├── .gitattributes
├── .gitconfig
├── .gitignore
├── .gitmodules
├── .style.yapf
├── .travis.yml
├── LICENSE
├── README.md
├── app
    ├── .deps
    │   ├── .dummy
    │   ├── 00-load-django.py
    │   ├── Makefile.config
    │   ├── beakerx.json
    │   ├── common.sh
    │   ├── esper-ipython
    │   ├── esper-run
    │   ├── ipython_config.py
    │   ├── nbconfig
    │   │   ├── edit.json
    │   │   ├── notebook.json
    │   │   └── tree.json
    │   └── supervisord.conf
    ├── .dockerignore
    ├── .gitignore
    ├── Dockerfile.app
    ├── Dockerfile.base
    ├── app
    │   ├── __init__.py
    │   ├── settings.py
    │   ├── urls.py
    │   └── wsgi.py
    ├── assets
    │   ├── css
    │   │   ├── bootstrap.scss
    │   │   ├── main.scss
    │   │   ├── search-input.scss
    │   │   └── sidebar.scss
    │   ├── favicon-loading.ico
    │   ├── favicon.ico
    │   ├── images
    │   │   ├── dan_spinning.gif
    │   │   ├── haotian_spinning.gif
    │   │   ├── james_spinning.gif
    │   │   ├── kayvon_spinning.gif
    │   │   ├── maneesh_spinning.gif
    │   │   ├── spinner.gif
    │   │   └── will_spinning.gif
    │   └── js
    │   │   ├── Consumer.jsx
    │   │   ├── Provider.jsx
    │   │   ├── SearchInput.jsx
    │   │   ├── Sidebar.jsx
    │   │   ├── contexts.jsx
    │   │   └── web.jsx
    ├── deps
    │   ├── README.md
    │   ├── install-rust.sh
    │   ├── install.old.sh
    │   └── install.sh
    ├── esper
    │   ├── __init__.py
    │   ├── blackframes.py
    │   ├── blurriness.py
    │   ├── captions.py
    │   ├── clothing_detection.py
    │   ├── commercial_detection.py
    │   ├── commercial_detection_rekall.py
    │   ├── download
    │   │   ├── Dockerfile
    │   │   ├── count.sh
    │   │   ├── deploy.yml
    │   │   ├── start-cluster.sh
    │   │   ├── submit.py
    │   │   └── tasks.py
    │   ├── embed_google_images.py
    │   ├── face_detection.py
    │   ├── face_embedding_scanner.py
    │   ├── face_embeddings.py
    │   ├── frame_export.py
    │   ├── gender_detection.py
    │   ├── hairstyle_detection.py
    │   ├── hairstyle_prepare.py
    │   ├── identity.py
    │   ├── identity_clusters.py
    │   ├── identity_detect.py
    │   ├── ingest.py
    │   ├── ingest_interview_annotations.py
    │   ├── ingest_test.py
    │   ├── ingest_tvnews.py
    │   ├── interview.py
    │   ├── kube.py
    │   ├── load_aligned_transcript.py
    │   ├── major_canonical_shows.py
    │   ├── migrate.py
    │   ├── object_detection.py
    │   ├── plot_timeline.py
    │   ├── plot_util.py
    │   ├── pose_detect.py
    │   ├── prelude.py
    │   ├── queries
    │   │   ├── __init__.py
    │   │   ├── all_faces.py
    │   │   ├── all_videos.py
    │   │   ├── interview_with_person_x.py
    │   │   ├── other_queries.py
    │   │   ├── panels_rekall.py
    │   │   ├── panels_sql.py
    │   │   ├── queries.py
    │   │   ├── sandbox_labels.py
    │   │   └── unused_queries.py
    │   ├── rekall.py
    │   ├── rekall_parallel.py
    │   ├── rekall_query.py
    │   ├── rekall_vgrid.py
    │   ├── report.py
    │   ├── scanner_bench.py
    │   ├── scannerutil.py
    │   ├── shot_detection.py
    │   ├── spacy_tokenize.py
    │   ├── spark.py
    │   ├── spark_identity.py
    │   ├── spark_util.py
    │   ├── sql_dump.py
    │   ├── supercut.py
    │   ├── topics.py
    │   ├── transcript_alignment.py
    │   ├── update_show.py
    │   ├── update_tracks.py
    │   ├── validation.py
    │   ├── widget.py
    │   └── write_video.py
    ├── gunicorn_conf.py
    ├── manage.py
    ├── notebooks
    │   ├── alignment.ipynb
    │   ├── animatedness_scratch-Copy1.ipynb
    │   ├── animatedness_scratch.ipynb
    │   ├── attribute_labeling.ipynb
    │   ├── cluster_faces_and_export.ipynb
    │   ├── commercial.ipynb
    │   ├── corrupted_videos.ipynb
    │   ├── db_perf.ipynb
    │   ├── diarization.ipynb
    │   ├── document_figures.ipynb
    │   ├── download_and_embed_faces.ipynb
    │   ├── dump_stories_data.ipynb
    │   ├── duplicate_videos.ipynb
    │   ├── esper-tutorial.ipynb
    │   ├── example.ipynb
    │   ├── export_face_bboxes_for_aws.ipynb
    │   ├── face_attribute_visual_validation.ipynb
    │   ├── face_labeling.ipynb
    │   ├── face_search_eval.ipynb
    │   ├── facenet.py
    │   ├── gender.ipynb
    │   ├── gender_knn_classifier.ipynb
    │   ├── gender_labeling.ipynb
    │   ├── get_canonical_shows.ipynb
    │   ├── hairstyle.ipynb
    │   ├── identity_comparison_template.ipynb
    │   ├── identity_label_import.ipynb
    │   ├── identity_labeling.ipynb
    │   ├── identity_labeling_l2_only.ipynb
    │   ├── identity_labeling_uncommon.ipynb
    │   ├── import_aws_identities.ipynb
    │   ├── import_canonical_shows.ipynb
    │   ├── ingest.ipynb
    │   ├── interview.ipynb
    │   ├── introduction_detector.ipynb
    │   ├── labeled_identities
    │   │   ├── barack_obama.ipynb
    │   │   ├── bernie_sanders.ipynb
    │   │   ├── donald_trump.ipynb
    │   │   ├── donald_trump_on_hardball.ipynb
    │   │   ├── hillary_clinton.ipynb
    │   │   ├── hosts
    │   │   │   ├── abby_huntsman.ipynb
    │   │   │   ├── ainsley_earhardt.ipynb
    │   │   │   ├── alex_witt.ipynb
    │   │   │   ├── ali_velshi.ipynb
    │   │   │   ├── alisyn_camerota.ipynb
    │   │   │   ├── ana_cabrera.ipynb
    │   │   │   ├── anderson_cooper.ipynb
    │   │   │   ├── andrea_mitchell.ipynb
    │   │   │   ├── ari_melber.ipynb
    │   │   │   ├── arthel_neville.ipynb
    │   │   │   ├── ayman_mohyeldin.ipynb
    │   │   │   ├── bill_hemmer.ipynb
    │   │   │   ├── bill_o_reilly.ipynb
    │   │   │   ├── bret_baier.ipynb
    │   │   │   ├── brian_kilmeade.ipynb
    │   │   │   ├── brian_llenas.ipynb
    │   │   │   ├── brian_williams.ipynb
    │   │   │   ├── brit_hume.ipynb
    │   │   │   ├── brooke_baldwin.ipynb
    │   │   │   ├── carol_costello.ipynb
    │   │   │   ├── chris_cuomo.ipynb
    │   │   │   ├── chris_hayes.ipynb
    │   │   │   ├── chris_jansing.ipynb
    │   │   │   ├── chris_matthews.ipynb
    │   │   │   ├── christi_paul.ipynb
    │   │   │   ├── christine_romans.ipynb
    │   │   │   ├── chuck_todd.ipynb
    │   │   │   ├── craig_melvin.ipynb
    │   │   │   ├── dana_perino.ipynb
    │   │   │   ├── dave_briggs.ipynb
    │   │   │   ├── don_lemon.ipynb
    │   │   │   ├── doug_mckelway.ipynb
    │   │   │   ├── elizabeth_prann.ipynb
    │   │   │   ├── eric_shawn.ipynb
    │   │   │   ├── erin_burnett.ipynb
    │   │   │   ├── frances_rivera.ipynb
    │   │   │   ├── fredricka_whitfield.ipynb
    │   │   │   ├── greg_gutfeld.ipynb
    │   │   │   ├── greta_van_susteren.ipynb
    │   │   │   ├── griff_jenkins.ipynb
    │   │   │   ├── hallie_jackson.ipynb
    │   │   │   ├── harris_faulkner.ipynb
    │   │   │   ├── jake_tapper.ipynb
    │   │   │   ├── janice_dean.ipynb
    │   │   │   ├── jeanine_pirro.ipynb
    │   │   │   ├── jesse_watters.ipynb
    │   │   │   ├── joe_scarborough.ipynb
    │   │   │   ├── john_berman.ipynb
    │   │   │   ├── john_scott.ipynb
    │   │   │   ├── juan_williams.ipynb
    │   │   │   ├── julie_banderas.ipynb
    │   │   │   ├── kate_bolduan.ipynb
    │   │   │   ├── kate_snow.ipynb
    │   │   │   ├── katy_tur.ipynb
    │   │   │   ├── kelly_wright.ipynb
    │   │   │   ├── kimberly_guilfoyle.ipynb
    │   │   │   ├── lawrence_o_donnell.ipynb
    │   │   │   ├── leland_vittert.ipynb
    │   │   │   ├── louis_burgdorf.ipynb
    │   │   │   ├── martha_maccallum.ipynb
    │   │   │   ├── megyn_kelly.ipynb
    │   │   │   ├── melissa_francis.ipynb
    │   │   │   ├── michael_cohen.ipynb
    │   │   │   ├── mika_brzezinski.ipynb
    │   │   │   ├── molly_henneberg.ipynb
    │   │   │   ├── neil_cavuto.ipynb
    │   │   │   ├── pete_hegseth.ipynb
    │   │   │   ├── poppy_harlow.ipynb
    │   │   │   ├── rachel_maddow.ipynb
    │   │   │   ├── richard_lui.ipynb
    │   │   │   ├── sandra_smith.ipynb
    │   │   │   ├── sean_hannity.ipynb
    │   │   │   ├── shannon_bream.ipynb
    │   │   │   ├── shepard_smith.ipynb
    │   │   │   ├── stephanie_ruhle.ipynb
    │   │   │   ├── steve_doocy.ipynb
    │   │   │   ├── steve_kornacki.ipynb
    │   │   │   ├── tamron_hall.ipynb
    │   │   │   ├── thomas_roberts.ipynb
    │   │   │   ├── tom_shillue.ipynb
    │   │   │   ├── tucker_carlson.ipynb
    │   │   │   ├── uma_pemmaraju.ipynb
    │   │   │   ├── victor_blackwell.ipynb
    │   │   │   ├── walter_scott.ipynb
    │   │   │   ├── willie_geist.ipynb
    │   │   │   ├── wolf_blitzer.ipynb
    │   │   │   └── yasmin_vossoughian.ipynb
    │   │   ├── ivanka_trump.ipynb
    │   │   ├── jared_kushner.ipynb
    │   │   ├── john_mccain.ipynb
    │   │   ├── kellyanne_conway.ipynb
    │   │   ├── paul_manafort.ipynb
    │   │   ├── robert_mueller.ipynb
    │   │   ├── sarah_huckabee_sanders.ipynb
    │   │   ├── sean_spicer.ipynb
    │   │   ├── shooters
    │   │   │   ├── chris_harper_mercer.ipynb
    │   │   │   ├── devin_patrick_kelley.ipynb
    │   │   │   ├── dylann_roof.ipynb
    │   │   │   ├── esteban_santiago_ruiz.ipynb
    │   │   │   ├── gavin_eugene_long.ipynb
    │   │   │   ├── micah_xavier_johnson.ipynb
    │   │   │   ├── muhammad_youssef_abdulazeez.ipynb
    │   │   │   ├── omar_mateen.ipynb
    │   │   │   ├── robert_lewis_dear.ipynb
    │   │   │   ├── stephen_paddock.ipynb
    │   │   │   ├── syed_rizwan_farook.ipynb
    │   │   │   ├── tashfeen_malik.ipynb
    │   │   │   └── willie_corey_godbolt.ipynb
    │   │   └── vladimir_putin.ipynb
    │   ├── michaela_notebooks
    │   │   ├── Analysis of Shooters and Victims
    │   │   │   ├── Al Franken-export.txt
    │   │   │   ├── Al Franken.ipynb
    │   │   │   ├── Al Franken.txt
    │   │   │   ├── Alton Sterling (Victim).ipynb
    │   │   │   ├── Bill Cosby (Celebrity).ipynb
    │   │   │   ├── Bill Cosby-export.txt
    │   │   │   ├── Bill Cosby.txt
    │   │   │   ├── Chris Harper-Mercer (Shooter).ipynb
    │   │   │   ├── Chris Harper-Mercer.txt
    │   │   │   ├── Devin Patrick Kelley (Shooter).ipynb
    │   │   │   ├── Devin Patrick Kelley.txt
    │   │   │   ├── Dylann Roof (Shooter).ipynb
    │   │   │   ├── Dylann Roof.txt
    │   │   │   ├── Eric Garner (Victim).ipynb
    │   │   │   ├── Eric Garner.txt
    │   │   │   ├── Esteban Santiago-Ruiz (Shooter).ipynb
    │   │   │   ├── Esteban Santiago-Ruiz.txt
    │   │   │   ├── Gavin Eugene Long (Shooter).ipynb
    │   │   │   ├── Gavin Eugene Long.txt
    │   │   │   ├── George Zimmerman (Shooter).ipynb
    │   │   │   ├── George Zimmerman-export.txt
    │   │   │   ├── George Zimmerman.txt
    │   │   │   ├── Info.txt
    │   │   │   ├── Kanye West.ipynb
    │   │   │   ├── Kanye West.txt
    │   │   │   ├── Laquan McDonald (Victim).ipynb
    │   │   │   ├── Micah Xavier Johnson (Shooter).ipynb
    │   │   │   ├── Micah Xavier Johnson.txt
    │   │   │   ├── Michael Brown (Victim).ipynb
    │   │   │   ├── Omar Mateen (Shooter).ipynb
    │   │   │   ├── Omar Mateen.txt
    │   │   │   ├── Philando Castile (Victim).ipynb
    │   │   │   ├── Robert Lewis Dear Jr (Shooter).ipynb
    │   │   │   ├── Robert Lewis Dear Jr.txt
    │   │   │   ├── Roger Ailes (Celebrity).ipynb
    │   │   │   ├── Roger Ailes.txt
    │   │   │   ├── Roger Ailes_export.txt
    │   │   │   ├── Stephen Paddock (Shooter).ipynb
    │   │   │   ├── Stephen Paddock.txt
    │   │   │   ├── Syed Rizwan Farook (Shooter).ipynb
    │   │   │   ├── Syed Rizwan Farook One.txt
    │   │   │   ├── Syed Rizwan Farook Two.txt
    │   │   │   ├── Syed Rizwan Farook-export.txt
    │   │   │   ├── Syed Rizwan Farook.txt
    │   │   │   ├── Syed Rizwan Farook_export.txt
    │   │   │   ├── Tamir Rice (Victim).ipynb
    │   │   │   ├── Tashfeen Malik (Shooter).ipynb
    │   │   │   ├── Tashfeen Malik.txt
    │   │   │   ├── Trayvon Martin (Victim).ipynb
    │   │   │   ├── Trayvon Martin Final Test.txt
    │   │   │   ├── Trayvon Martin-export.txt
    │   │   │   ├── Trayvon Martin.txt
    │   │   │   ├── Trayvon Martin_export.txt
    │   │   │   ├── Walter Scott (Victim).ipynb
    │   │   │   ├── Willie Corey Godbolt (Shooter).ipynb
    │   │   │   ├── Willie Corey Godbolt.txt
    │   │   │   ├── roger-ailes-photos
    │   │   │   │   ├── 123703025.jpg
    │   │   │   │   ├── 251405426.jpg
    │   │   │   │   ├── 251529606.jpg
    │   │   │   │   ├── 251626619.jpg
    │   │   │   │   ├── 261346406.jpg
    │   │   │   │   ├── 262510755.jpg
    │   │   │   │   ├── 268732111.jpg
    │   │   │   │   ├── 314770412.jpg
    │   │   │   │   └── 7665011.jpg
    │   │   │   └── test1.html
    │   │   ├── Tashfeen Malik.txt
    │   │   ├── data.txt
    │   │   ├── do_stills_of_shooters_vary_by_channel.ipynb
    │   │   ├── do_stills_of_victims_vary_by_channel.ipynb
    │   │   ├── identity_labeling_uncommon.ipynb
    │   │   ├── ipywidgets Testing.ipynb
    │   │   ├── ove_cat.jpg
    │   │   └── scratch.ipynb
    │   ├── model_cross_validation.ipynb
    │   ├── perf.ipynb
    │   ├── problang
    │   │   ├── custom_mlp.py
    │   │   ├── transcript_heuristic.ipynb
    │   │   ├── transcript_labeling.ipynb
    │   │   ├── transcript_regression.ipynb
    │   │   ├── transcript_utils.py
    │   │   └── transcript_vae.ipynb
    │   ├── propagate_aws_labels.ipynb
    │   ├── queries.ipynb
    │   ├── queries_different_languages.ipynb
    │   ├── queries_v2.ipynb
    │   ├── queries_v3.ipynb
    │   ├── questions
    │   │   ├── are_big_faces_more_male (face height).ipynb
    │   │   ├── did_visual_depictions_of_comey_change_by_scandal.ipynb
    │   │   ├── do_hosts_have_big_heads.ipynb
    │   │   ├── do_stills_of_shooters_vary_by_channel.ipynb
    │   │   ├── do_stills_of_victims_vary_by_channel.ipynb
    │   │   ├── do_topics_differ_on_opinion_and_news_shows.ipynb
    │   │   ├── which_shooter_received_the_most_screentime.ipynb
    │   │   └── who_said_maga_on_tv.ipynb
    │   ├── race_knn_classifier.ipynb
    │   ├── race_labeler_validation.ipynb
    │   ├── rekall_3d
    │   │   ├── rekalling_entire_tvnews.ipynb
    │   │   └── tutorial.ipynb
    │   ├── rekall_examples.ipynb
    │   ├── rekall_queries.ipynb
    │   ├── report.ipynb
    │   ├── run_ingest_interview_annotations.ipynb
    │   ├── scratch.ipynb
    │   ├── senator_search.ipynb
    │   ├── set_frame_sampler.ipynb
    │   ├── shots.ipynb
    │   ├── simple_templates
    │   │   ├── identity_still_images.ipynb
    │   │   └── topic_by_show.ipynb
    │   ├── spark.ipynb
    │   ├── speaker-recognition.ipynb
    │   ├── stories.ipynb
    │   ├── subtitles.ipynb
    │   ├── supercuts.ipynb
    │   ├── tableau_csv.ipynb
    │   ├── test_captions.ipynb
    │   ├── topic_analysis_workflow.ipynb
    │   ├── topics
    │   │   ├── debug_abortion.ipynb
    │   │   ├── gender_abortion.ipynb
    │   │   ├── gender_autism.ipynb
    │   │   ├── gender_election.ipynb
    │   │   ├── gender_harrassment.ipynb
    │   │   ├── gender_many_topics.ipynb
    │   │   ├── gender_many_topics_mentions.ipynb
    │   │   ├── gender_sandy_hook.ipynb
    │   │   ├── gender_terrorism.ipynb
    │   │   ├── gender_trump.ipynb
    │   │   └── gender_vaccine.ipynb
    │   ├── validate_aws_identities.ipynb
    │   ├── validation.ipynb
    │   ├── vgrid_test.ipynb
    │   ├── video_timeline_plot_demo.ipynb
    │   └── widget_scripts
    │   │   ├── dump_face_data.ipynb
    │   │   └── dump_server_data.ipynb
    ├── package.json
    ├── query
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── apps.py
    │   ├── base_models.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   └── run.py
    │   ├── migrations
    │   │   ├── 0001_initial.py
    │   │   ├── 0002_auto_20180529_1049.py
    │   │   ├── 0003_show_hosts.py
    │   │   ├── 0004_auto_20180612_1255.py
    │   │   ├── 0005_auto_20180612_1316.py
    │   │   ├── 0006_auto_20180612_1317.py
    │   │   ├── 0007_canonicalshow_hosts.py
    │   │   ├── 0008_auto_20180625_1033.py
    │   │   ├── 0009_object.py
    │   │   ├── 0010_auto_20180625_1152.py
    │   │   ├── 0011_auto_20180625_1157.py
    │   │   ├── 0012_auto_20180712_1252.py
    │   │   ├── 0013_auto_20180825_2234.py
    │   │   ├── 0014_auto_20180926_1514.py
    │   │   ├── 0015_auto_20180926_1902.py
    │   │   ├── 0016_auto_20180926_2037.py
    │   │   ├── 0017_auto_20180926_2045.py
    │   │   ├── 0018_auto_20180926_2045.py
    │   │   ├── 0019_topic.py
    │   │   ├── 0020_segment_topics.py
    │   │   ├── 0021_auto_20180926_2104.py
    │   │   ├── 0022_auto_20181026_1002.py
    │   │   ├── 0022_frame_shot_boundary.py
    │   │   ├── 0023_auto_20181026_1010.py
    │   │   ├── 0023_auto_20181101_2104.py
    │   │   ├── 0024_auto_20181103_2019.py
    │   │   ├── 0024_labeledcommercial_labeledinterview_labeledpanel.py
    │   │   ├── 0025_merge_20181227_1812.py
    │   │   ├── 0026_auto_20181227_1812.py
    │   │   ├── 0027_auto_20181227_1829.py
    │   │   ├── 0028_auto_20181228_1555.py
    │   │   ├── 0028_auto_20190101_2032.py
    │   │   ├── 0029_video_duplicate.py
    │   │   ├── 0030_video_corrupted.py
    │   │   ├── 0031_merge_20190105_1016.py
    │   │   ├── 0032_auto_20190106_1451.py
    │   │   ├── 0033_auto_20190311_1730.py
    │   │   ├── 0034_auto_20190711_1442.py
    │   │   └── __init__.py
    │   ├── models.py
    │   ├── templates
    │   │   ├── base.html
    │   │   └── index.html
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    ├── requirements.app.txt
    ├── requirements.base.txt
    ├── scripts
    │   ├── build-tf.sh
    │   ├── clear-proxy.sh
    │   ├── create-sample.sh
    │   ├── export-table.sh
    │   ├── generate-protobuf.sh
    │   ├── google-setup.sh
    │   ├── jupyter-setup.sh
    │   ├── load-cloud-db.sh
    │   ├── set-jupyter-password.py
    │   ├── transfer-to-bigquery.sh
    │   └── upload-sample.sh
    ├── test
    │   ├── README.md
    │   └── __init__.py
    └── webpack.config.js
├── config
    ├── google.toml
    └── local.toml
├── configure.py
├── db
    ├── .dockerignore
    ├── Dockerfile
    └── postgresql.conf
├── guides
    ├── dev.md
    └── google.md
├── nginx
    ├── Dockerfile
    ├── google.conf.template
    ├── local.conf.template
    ├── nginx.conf.template
    └── subst.sh
├── requirements.txt
├── scripts
    ├── emergency-reset.py
    ├── gce-gpu-setup.sh
    ├── get-bucket-size.sh
    ├── jupyter-token.sh
    ├── run-app-tests.sh
    ├── serve-reset.sh
    ├── setup-db-csv-dir.sh
    ├── transfer-db.sh
    ├── transfer-storage-logs.sh
    └── travis-build.sh
└── spark
    └── Dockerfile


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb filter=nbstrip


--------------------------------------------------------------------------------
/.gitconfig:
--------------------------------------------------------------------------------
1 | [filter "nbstrip"]
2 | clean = "jq --indent 1 \
3 |     '(.cells[] | select(has(\"outputs\")) | .outputs) = []  \
4 |     | (.cells[] | select(has(\"execution_count\")) | .execution_count) = null  \
5 |     '"
6 | smudge = cat
7 | required = true


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.sqlite3
 2 | *.pyc
 3 | *.swp
 4 | docker-compose.yml
 5 | nvidia-docker-compose.yml
 6 | service-key.json
 7 | db/data
 8 | .scanner.toml
 9 | **/__pycache__
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "app/deps/vgrid"]
 2 | 	path = app/deps/vgrid
 3 | 	url = https://github.com/scanner-research/vgrid
 4 | [submodule "app/deps/vgrid_jupyter"]
 5 | 	path = app/deps/vgrid_jupyter
 6 | 	url = https://github.com/scanner-research/vgrid_jupyter
 7 | [submodule "app/deps/caption-index"]
 8 | 	path = app/deps/caption-index
 9 | 	url = https://github.com/scanner-research/caption-index.git
10 | [submodule "app/deps/rekall"]
11 | 	path = app/deps/rekall
12 | 	url = https://github.com/scanner-research/rekall.git
13 | [submodule "app/deps/rs-embed"]
14 | 	path = app/deps/rs-embed
15 | 	url = https://github.com/scanner-research/rs-embed.git
16 | [submodule "app/deps/esper-model-server"]
17 | 	path = app/deps/esper-model-server
18 | 	url = https://github.com/scanner-research/esper-model-server.git
19 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = pep8
3 | column_limit = 100


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 | 
 5 | sudo: required
 6 | services:
 7 |   - docker
 8 | 
 9 | # Use latest docker (need 17.05)
10 | addons:
11 |   apt:
12 |     packages:
13 |       - docker-ce
14 | env:
15 |   global:
16 |     - DOCKER_REPO=scannerresearch/esper
17 |     - DOCKER_EMAIL=wcrichto@cs.stanford.edu
18 |     - DOCKER_USER=wcrichto
19 |     - secure: "mPNnsLLZfEHbgH/R77wCYY4793Khdhg9Q9xE10hmTumvCAeWyO7oIJUSkONAX9AXlkVNzVMkPVMj3Un9uf5hwmc/HTUgQ8+lCTR+HPnV/CMsRSWGTf2kLSsqxF+wRLM90Vr0PVMXui3YeyanxEh0qbu3HsBjLVIMBiqBrybZx53E1vnNF1/lwQgi7ljpgQHNkkVLBc2yVsQ4izvLq7NlpcmtumEFeeWIft2WN7t4r7W7Q2c+QAADXrTFzrKxJkw4oz8Psn9WQeQlQf2HFdeVKoJJzwtce2QDKUxMpcRpVNIQ3drxBv33u4JMV7zhf/frGycuaKitiL8dLv0aX6rlW72tYh6Cc0SFQo7ZxbbZw18i6wRVLHf2OpzvFACvtbIx+5DCcojga/dbJGjRzn5fOCtF1nXuHz3z/OwQKjzYR/tQGD7Fa2PINQQpD4k+sOOhK3N+cHvvv9tCASUvYV4IXneSrlzlm6k/sTthx0oAZmFTLhTPgCdL2rt9Y1R3e/2p9MUza4uZk3Vh4KKN9NgjCUby42JyXbM9T48+hkiaRzb/NLk0fvU1q5bYFVi5vnsKijHJL3fqEt6Qp2nP+HwK9iXoq2yfayOpw5YHV2T+opr7GC7qVa7bEUmQ1OwCZWbJ3h2Fyfpa3lhkC59xHmAWayRWN9DX0jXsNaMj9y+p1pY="
20 | 
21 | before_install:
22 |   - pip3 install -r requirements.txt
23 | 
24 | script:
25 |   - ./scripts/travis-build.sh
26 |   - ./scripts/run-app-tests.sh
27 | 


--------------------------------------------------------------------------------
/app/.deps/.dummy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/.deps/.dummy


--------------------------------------------------------------------------------
/app/.deps/00-load-django.py:
--------------------------------------------------------------------------------
1 | import os
2 | import django
3 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings')
4 | django.setup()
5 | 


--------------------------------------------------------------------------------
/app/.deps/beakerx.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "beakerx": {
 3 |         "version": 2,
 4 |         "jvm_options": {
 5 |             "heap_GB": null,
 6 |             "other": [],
 7 |             "properties": {}
 8 |         },
 9 |         "ui_options": {
10 |             "auto_close": true,
11 |             "improve_fonts": false,
12 |             "wide_cells": false,
13 |             "show_publication": false,
14 |             "auto_save": true,
15 |             "use_data_grid": true
16 |         }
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/app/.deps/common.sh:
--------------------------------------------------------------------------------
 1 | alias kc=kubectl
 2 | alias kga='kc get all'
 3 | alias py=python
 4 | 
 5 | function kpod() {
 6 |     kc get pod --output=json | jq ".items[] | select(.metadata.labels.app == \"scanner-$1\") | .metadata.name" -r
 7 | }
 8 | 
 9 | function klog() {
10 |     kc logs ${*:2} po/$(kpod $1)
11 | }
12 | 
13 | source /root/.cargo/env
14 | 


--------------------------------------------------------------------------------
/app/.deps/esper-ipython:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python3 /app/manage.py shell


--------------------------------------------------------------------------------
/app/.deps/esper-run:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python3 /app/manage.py run -- $*
4 | 


--------------------------------------------------------------------------------
/app/.deps/ipython_config.py:
--------------------------------------------------------------------------------
1 | c.InteractiveShellApp.extensions = ['autoreload']
2 | c.InteractiveShellApp.exec_lines = ['%autoreload 2', '%matplotlib inline']
3 | 


--------------------------------------------------------------------------------
/app/.deps/nbconfig/edit.json:
--------------------------------------------------------------------------------
1 | {
2 |   "load_extensions": {
3 |     "codefolding/edit": false
4 |   }
5 | }


--------------------------------------------------------------------------------
/app/.deps/nbconfig/notebook.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "toc2": {
 3 |     "skip_h1_title": false,
 4 |     "toc_window_display": false,
 5 |     "markTocItemOnScroll": false,
 6 |     "widenNotebook": false,
 7 |     "navigate_menu": false,
 8 |     "moveMenuLeft": false,
 9 |     "sideBar": false,
10 |     "collapse_to_match_collapsible_headings": false,
11 |     "toc_cell": true
12 |   },
13 |   "load_extensions": {
14 |     "collapsible_headings/main": false,
15 |     "toc2/main": true,
16 |     "nbextensions_configurator/config_menu/main": true,
17 |     "codefolding/main": false,
18 |     "hide_input/main": true,
19 |     "python-markdown/main": true,
20 |     "code_prettify/autopep8": true,
21 |     "notify/notify": true,
22 |     "contrib_nbextensions_help_item/main": true
23 |   },
24 |   "collapsible_headings": {
25 |     "collapse_to_match_toc": true
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/app/.deps/nbconfig/tree.json:
--------------------------------------------------------------------------------
1 | {
2 |   "load_extensions": {
3 |     "nbextensions_configurator/tree_tab/main": true
4 |   }
5 | }


--------------------------------------------------------------------------------
/app/.deps/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [supervisord]
 2 | nodaemon=true
 3 | logfile=/tmp/supervisord.log
 4 | pidfile=/tmp/supervisord.pid
 5 | user=root
 6 | 
 7 | [program:gunicorn]
 8 | command=gunicorn --log-file=- -c gunicorn_conf.py app.wsgi:application --reload
 9 | stdout_logfile=/dev/stdout
10 | stdout_logfile_maxbytes=0
11 | stderr_logfile=/dev/stderr
12 | stderr_logfile_maxbytes=0
13 | 
14 | [program:notebook]
15 | command=python3 manage.py shell_plus --notebook
16 | stdout_logfile=/dev/stdout
17 | stdout_logfile_maxbytes=0
18 | stderr_logfile=/dev/stderr
19 | stderr_logfile_maxbytes=0


--------------------------------------------------------------------------------
/app/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Datbuildebug directories
 2 | scanner_db
 3 | node_modules
 4 | data
 5 | deps
 6 | google_images
 7 | nets
 8 | .cache
 9 | spark-data
10 | pg
11 | subs
12 | tmp
13 | assets/bundles
14 | assets/thumbnails
15 | assets/audio
16 | .ipynb_checkpoints
17 | rust-scripts/target
18 | rust-scripts/build
19 | esper_jupyter
20 | # Specific files
21 | service-key.json
22 | npm-debug.log
23 | supervisord.conf
24 | video-analysis
25 | .env
26 | 
27 | # File kinds
28 | **/db.bk.*
29 | **/yapf*.py
30 | **/*.sql
31 | **/*.bin
32 | **/*.mkv
33 | **/*.mp4
34 | **/*.pkl
35 | **/*_pb2.py
36 | 


--------------------------------------------------------------------------------
/app/.gitignore:
--------------------------------------------------------------------------------
 1 | # Datbuildebug directories
 2 | scanner_db
 3 | node_modules
 4 | data
 5 | deps/tmp
 6 | google_images
 7 | nets
 8 | .cache
 9 | spark-data
10 | pg
11 | subs
12 | tmp
13 | assets/bundles
14 | assets/thumbnails
15 | assets/audio
16 | .ipynb_checkpoints
17 | rust-scripts/target
18 | rust-scripts/build
19 | esper_jupyter
20 | # Specific files
21 | service-key.json
22 | npm-debug.log
23 | supervisord.conf
24 | video-analysis
25 | .env
26 | 
27 | # File kinds
28 | **/db.bk.*
29 | **/yapf*.py
30 | **/*.sql
31 | **/*.bin
32 | **/*.mkv
33 | **/*.mp4
34 | **/*.pkl
35 | **/*_pb2.py
36 | 


--------------------------------------------------------------------------------
/app/Dockerfile.app:
--------------------------------------------------------------------------------
 1 | ARG base_name
 2 | ARG device=cpu
 3 | FROM ${base_name}:${device}
 4 | ARG cores=1
 5 | ENV DJANGO_CONFIGURATION Docker
 6 | ENV TERM=xterm
 7 | 
 8 | # Misc apt dependencies
 9 | RUN apt-get update && \
10 |     apt-get install -y cron npm nodejs curl unzip jq gdb psmisc zsh && \
11 |     ln -s /usr/bin/nodejs /usr/bin/node
12 | 
13 | # Google Cloud SDK
14 | RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | \
15 |     tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
16 |     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
17 |     apt-get update && apt-get install -y google-cloud-sdk kubectl
18 | 
19 | # Python setup
20 | COPY requirements.app.txt ./
21 | RUN pip3 install -r requirements.app.txt
22 | 
23 | # supervisor only works with python2, so have to specially download old pip to install it
24 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python get-pip.py && pip install supervisor==3.3.3
25 | 
26 | # IPython config
27 | COPY .deps/ipython_config.py /root/.ipython/profile_default/ipython_config.py
28 | RUN mkdir -p /root/.ipython/profile_local/startup
29 | COPY .deps/ipython_config.py /root/.ipython/profile_local/ipython_config.py
30 | COPY .deps/00-load-django.py /root/.ipython/profile_local/startup/00-load-django.py
31 | 
32 | # Fix npm hanging on OS X
33 | # https://github.com/npm/npm/issues/7862#issuecomment-220798263
34 | RUN npm config set registry http://registry.npmjs.org && \
35 |     npm config set strict-ssl false
36 | 
37 | RUN npm i -g npm n && n stable
38 | 
39 | # Install npm packages in ~/.local by default so they persist across container restarts
40 | RUN npm config set prefix /root/.local
41 | 
42 | # Setup bash helpers
43 | COPY .deps/esper-run .deps/esper-ipython /usr/bin/
44 | COPY .deps/common.sh /tmp
45 | RUN cat /tmp/common.sh >> /root/.bashrc
46 | 
47 | # Fix Google Cloud Storage URL library dependencies
48 | RUN unset PYTHONPATH && pip2 install cryptography
49 | 
50 | ENV GLOG_minloglevel 1
51 | ENV GOOGLE_APPLICATION_CREDENTIALS ${APPDIR}/service-key.json
52 | ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/lib:/usr/local/lib/python3.5/dist-packages/hwang
53 | ENV PYTHONPATH $PYTHONPATH:/app
54 | ENV PYTHONPATH /opt/scannertools:$PYTHONPATH
55 | 
56 | CMD cp .scanner.toml /root/ && \
57 |     ./scripts/google-setup.sh && \
58 |     ./scripts/jupyter-setup.sh && \
59 |     supervisord -c supervisord.conf
60 | 


--------------------------------------------------------------------------------
/app/Dockerfile.base:
--------------------------------------------------------------------------------
 1 | ARG tag
 2 | FROM scannerresearch/scannertools:${tag}-latest
 3 | # ARGS before FROM aren't accessible after the FROM, so we need to replicate the device arg.
 4 | ARG build_tf=off
 5 | ARG tf_version=1.12.0
 6 | ARG device=cpu
 7 | ARG cores=1
 8 | 
 9 | ENV APPDIR=/app
10 | WORKDIR ${APPDIR}
11 | 
12 | RUN apt-get update && apt-get install -y default-jre postgresql-9.5 libpq-dev gdb
13 | COPY ./scripts ./scripts
14 | COPY ./requirements.base.txt ./
15 | RUN ./scripts/build-tf.sh
16 | RUN pip3 install -r requirements.base.txt
17 | 
18 | COPY ./.deps/.dummy scannerpatc[h] ./
19 | RUN if [ -f /app/scannerpatch ]; then \
20 |     cd /opt/scanner && \
21 |     git apply /app/scannerpatch && \
22 |     ./build.sh; \
23 |     fi
24 | 
25 | COPY ./.scanner.toml /root/.scanner/config.toml
26 | 


--------------------------------------------------------------------------------
/app/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/app/__init__.py


--------------------------------------------------------------------------------
/app/app/urls.py:
--------------------------------------------------------------------------------
 1 | """esper URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.10/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf import settings
17 | from django.conf.urls import url, include
18 | from django.contrib import admin
19 | from django.conf.urls.static import static
20 | 
21 | urlpatterns = [
22 |     url(r'^', include('query.urls')),
23 |     url(r'^admin/', admin.site.urls),
24 | ]
25 | 


--------------------------------------------------------------------------------
/app/app/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for esper project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/app/assets/css/bootstrap.scss:
--------------------------------------------------------------------------------
1 | @import '~bootstrap/dist/css/bootstrap.min.css';
2 | 


--------------------------------------------------------------------------------
/app/assets/css/main.scss:
--------------------------------------------------------------------------------
 1 | @import '~bootstrap/dist/css/bootstrap.min.css';
 2 | @import '~open-iconic/font/css/open-iconic-bootstrap.css';
 3 | @import '~vgrid/dist/vgrid.css';
 4 | 
 5 | @import 'search-input';
 6 | @import 'sidebar';
 7 | 
 8 | body {
 9 |     background: #fefefe;
10 |     font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif;
11 | }
12 | 
13 | .login-error {
14 |     text-align: center;
15 |     font-size: 24px;
16 |     margin-top: 30px;
17 | }
18 | 
19 | .clearfix {
20 |     clear: both;
21 | }
22 | 
23 | .search-result {
24 |     position: relative;
25 | }


--------------------------------------------------------------------------------
/app/assets/css/search-input.scss:
--------------------------------------------------------------------------------
 1 | .search-input {
 2 |     $spacing: 10px;
 3 | 
 4 |     margin-bottom: 2*$spacing;
 5 | 
 6 |     .spinner {
 7 |         height: 30px;
 8 |         margin-left: 15px;
 9 |     }
10 | 
11 |     .panel {
12 |         margin: 0;
13 |     }
14 | 
15 |     .control-label {
16 |         font-weight: normal;
17 |         margin-right: 5px;
18 |     }
19 | 
20 |     button {
21 |         outline: 0!important;
22 |     }
23 | 
24 |     .schema {
25 |         margin-top: $spacing;
26 | 
27 |         .schema-classes {
28 |             overflow: auto;
29 | 
30 |             .schema-class-name {
31 |                 font-weight: bold;
32 |                 margin-bottom: 5px;
33 |             }
34 | 
35 |             .schema-class {
36 |                 float: left;
37 |                 width: 120px;
38 |                 margin-right: $spacing;
39 |                 margin-bottom: $spacing;
40 |             }
41 | 
42 |             .schema-class-fields {
43 |                 overflow: auto;
44 |                 height: 120px;
45 |             }
46 | 
47 |             .schema-field {
48 |                 cursor: pointer;
49 |             }
50 | 
51 |             .panel-body {
52 |                 padding: 5px 10px;
53 |             }
54 |         }
55 | 
56 |         img {
57 |             margin-top: $spacing;
58 |         }
59 | 
60 |         .schema-example {
61 |             margin-top: $spacing;
62 | 
63 |             .schema-example-name {
64 |                 font-weight: bold;
65 |                 margin-bottom: 5px;
66 |             }
67 |         }
68 |     }
69 | 
70 |     .example-queries {
71 |         margin-top: $spacing;
72 |     }
73 | 
74 |     .ace_editor {
75 |         margin-bottom: $spacing;
76 |     }
77 | 
78 |     button {
79 |         margin-right: $spacing;
80 |     }
81 | 
82 |     .alert {
83 |         margin-top: 10px;
84 | 
85 |         pre {
86 |             border: none;
87 |             background: transparent;
88 |             padding: 0;
89 |             margin: 0;
90 |         }
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/app/assets/css/sidebar.scss:
--------------------------------------------------------------------------------
 1 | .sidebar {
 2 |     $width: 150px;
 3 |     width: $width;
 4 |     position: absolute;
 5 |     top: 0;
 6 |     height: 100%;
 7 | 
 8 |     $sidebar-margin: -($width+20px);
 9 | 
10 |     &.left {
11 |         left: $sidebar-margin;
12 |     }
13 | 
14 |     &.right {
15 |         right: $sidebar-margin;
16 |     }
17 | 
18 |     > div {
19 |         position: sticky;
20 |         top: 20px;
21 |         left: 0;
22 |     }
23 | 
24 |     .options {
25 |         input[type=range] {
26 |             display: inline-block;
27 |             width: 40%;
28 |             margin-right: 10px;
29 |             vertical-align: middle;
30 |         }
31 | 
32 |         input[type=number] {
33 |             display: inline-block;
34 |             padding: 2px 6px;
35 |             width: 40%;
36 |         }
37 |     }
38 | 
39 |     .metadata {
40 |         .meta-block {
41 |             margin-bottom: 10px;
42 |         }
43 | 
44 |         .meta-key {
45 |             font-weight: bold;
46 |         }
47 | 
48 |         .colors {
49 |             .meta-val > div {
50 |                 display: inline-block;
51 |                 margin-right: 20px;
52 | 
53 |                 > div {
54 |                     display: inline-block;
55 |                 }
56 |             }
57 |         }
58 |     }
59 | 
60 |     .help-section {
61 |         margin-top: 10px;
62 |     }
63 | 
64 |     .btn {
65 |         font-size: 14px;
66 |         padding: 4px 6px;
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/app/assets/favicon-loading.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/favicon-loading.ico


--------------------------------------------------------------------------------
/app/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/favicon.ico


--------------------------------------------------------------------------------
/app/assets/images/dan_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/dan_spinning.gif


--------------------------------------------------------------------------------
/app/assets/images/haotian_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/haotian_spinning.gif


--------------------------------------------------------------------------------
/app/assets/images/james_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/james_spinning.gif


--------------------------------------------------------------------------------
/app/assets/images/kayvon_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/kayvon_spinning.gif


--------------------------------------------------------------------------------
/app/assets/images/maneesh_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/maneesh_spinning.gif


--------------------------------------------------------------------------------
/app/assets/images/spinner.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/spinner.gif


--------------------------------------------------------------------------------
/app/assets/images/will_spinning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/will_spinning.gif


--------------------------------------------------------------------------------
/app/assets/js/Consumer.jsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {Observer} from 'mobx-react';
 3 | 
 4 | export default class Consumer extends React.Component {
 5 |   render() {
 6 |     let args = [];
 7 |     return this.props.contexts.reduce(
 8 |       (acc, ctx) => () => <ctx.Consumer>{ x => { args.unshift(x); return acc() }}</ctx.Consumer>,
 9 |       () => <Observer>{() => this.props.children(...args)}</Observer>)();
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/app/assets/js/Provider.jsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | export default class Provider extends React.Component {
 4 |   render() {
 5 |     return this.props.values.reduce(
 6 |       (inner, [context, value]) =>
 7 |         <context.Provider value={value}>{inner}</context.Provider>,
 8 |       this.props.children);
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/app/assets/js/contexts.jsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | export let SettingsContext = React.createContext({});
3 | export let DataContext = React.createContext({});
4 | 


--------------------------------------------------------------------------------
/app/assets/js/web.jsx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * web.jsx - Application entrypoint
 3 |  *
 4 |  * This file is called when the page is loaded. It initializes the App React view.
 5 |  */
 6 | 
 7 | import axios from 'axios';
 8 | import {observer} from 'mobx-react';
 9 | import React from 'react';
10 | import ReactDOM from 'react-dom';
11 | import SearchInput from './SearchInput.jsx';
12 | import Sidebar from './Sidebar.jsx';
13 | import VGrid from 'vgrid';
14 | import {SettingsContext, DataContext} from './contexts';
15 | import Provider from './Provider.jsx';
16 | import Consumer from './Consumer.jsx';
17 | import {observable} from 'mobx';
18 | 
19 | // Make AJAX work with Django's CSRF protection
20 | // https://stackoverflow.com/questions/39254562/csrf-with-django-reactredux-using-axios
21 | axios.defaults.xsrfHeaderName = "X-CSRFToken";
22 | 
23 | @observer
24 | export default class App extends React.Component {
25 |   state = {
26 |     valid: true,
27 |     clickedBox: null,
28 |     dataContext: null,
29 |     i: 0
30 |   }
31 | 
32 |   constructor() {
33 |     super();
34 | 
35 |     // Hacky way for us to publicly expose a demo while reducing remote code executixon risk.
36 |     if (GLOBALS.bucket === 'esper') {
37 |       let img = new Image();
38 |       img.onerror = (() => this.setState({valid: false})).bind(this);
39 |       img.src = "https://storage.cloud.google.com/esper/do_not_delete.jpg";
40 |     }
41 | 
42 |     this._settings = observable.map({});
43 |   }
44 | 
45 |   _onSearch = (results) => {
46 |     this.setState({dataContext: results, i: this.state.i + 1});
47 |   }
48 | 
49 |   _onBoxClick = (box) => {
50 |     this.setState({clickedBox: box.id});
51 |   }
52 | 
53 |   _onSave = (toSave) => {
54 |     return axios.post('/api/labeled', toSave);
55 |   }
56 | 
57 |   render() {
58 |     if (this.state.valid) {
59 |       return (
60 |         <div>
61 |           <h1>Esper</h1>
62 |           <div className='home'>
63 |             <Provider values={[
64 |               [DataContext, this.state.dataContext],
65 |               [SettingsContext, this._settings]]}>
66 |               <div>
67 |                 <SearchInput onSearch={this._onSearch} clickedBox={this.state.clickedBox} />
68 |                 {this.state.dataContext !== null
69 |                  ? (this.state.dataContext.groups.length > 0
70 |                   ? <div className='search-result'>
71 |                     <VGrid data={this.state.dataContext} settings={this._settings}
72 |                            onSave={this._onSave} resultNumber={this.state.i} />
73 |                     <Sidebar />
74 |                   </div>
75 |                   : <div>No results matching query.</div>)
76 |                  : null}
77 |               </div>
78 |             </Provider>
79 |           </div>
80 |         </div>
81 |       );
82 |     } else {
83 |       return <div className='login-error'>You must be logged into a validated Google account to access Esper.</div>
84 |     }
85 |   }
86 | };
87 | 
88 | ReactDOM.render(<App />, document.getElementById('app'));
89 | 


--------------------------------------------------------------------------------
/app/deps/README.md:
--------------------------------------------------------------------------------
 1 | # Installing deps
 2 | 
 3 | ## caption-index
 4 | 
 5 | Probably want to start with step 4 since the files take some time to download.
 6 | Also, there is no need to reconfigure or rebuild the containers.
 7 | 1. pull and then do `git submodule init` and then `git submodule update` in
 8 | the esper repo to get the caption index
 9 | 2. in the app container, navigate into caption-index and run
10 | `pip3 install -r requirements.txt`
11 | 3. also in the app container and in the caption-index directory, run
12 | `get_models.sh`
13 | 4. in the `app/data directory`, do
14 | `gsutil cp -r gs://esper/tvnews/caption-index10/index10 .`
15 | 5. after this, there should be an `index10` directory with some files like
16 | `words.lex`, `docs.bin`, etc...
17 | 
18 | Note: importing the caption module for the first time takes some time to load the lexicon. <-- may optimize this later to also mmap
19 | 
20 | ## rekall
21 | 
22 | 1. pull and then do `git submodule init` and then `git submodule update` in
23 | the esper repo to get rekall
24 | 2. in the app container, navigate into rekall and run
25 | `pip3 install -r requirements.txt`
26 | 
27 | 


--------------------------------------------------------------------------------
/app/deps/install-rust.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
3 | source /root/.cargo/env
4 | rustup default nightly
5 | 


--------------------------------------------------------------------------------
/app/deps/install.old.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RUN_TESTS=${RUN_TESTS:=0}
 4 | 
 5 | # Fail fast
 6 | set -e
 7 | 
 8 | DEPS_DIR=/app/deps
 9 | 
10 | pushd .
11 | 
12 | # Rekall
13 | cd $DEPS_DIR
14 | echo "Installing Rekall"
15 | cd rekall
16 | pip3 install --upgrade --force-reinstall --user -e .
17 | if [ $RUN_TESTS == 1 ]; then
18 |         python3 setup.py test
19 | fi
20 | 
21 | # Model server
22 | cd $DEPS_DIR
23 | echo "Installing Model-Server"
24 | cd esper-model-server
25 | ./extract_data.sh
26 | pip3 install --user -r requirements.txt
27 | if [ $RUN_TESTS == 1 ]; then
28 |         pytest -v tests
29 | fi
30 | 
31 | # Caption-Index
32 | cd $DEPS_DIR
33 | echo "Installing Caption-Index"
34 | cd caption-index
35 | rustup update
36 | rustup override set nightly
37 | pip3 install --upgrade --force-reinstall --user .
38 | ./get_models.sh
39 | if [ $RUN_TESTS == 1 ]; then
40 |         python3 setup.py test
41 | fi
42 | 
43 | # Rs-Embed
44 | cd $DEPS_DIR
45 | echo "Installing Rs-Embed"
46 | cd rs-embed
47 | rustup update
48 | rustup override set nightly
49 | pip3 install --upgrade --force-reinstall --user .
50 | if [ $RUN_TESTS == 1 ]; then
51 |         python3 setup.py test
52 | fi
53 | 
54 | cd $DEPS_DIR
55 | echo "Installing vgrid"
56 | #cd vgrid
57 | #npm install
58 | #npm link vgrid
59 | #npm run build
60 | 
61 | cd $DEPS_DIR
62 | echo "Installing vgrid_jupyter"
63 | cd vgrid_jupyter/js
64 | npm link vgrid
65 | npm install
66 | npm run build
67 | cd ..
68 | pip3 install --upgrade --force-reinstall --user -e .
69 | 
70 | jupyter nbextension enable --py --user widgetsnbextension
71 | jupyter contrib nbextension install --user --skip-running-check
72 | jupyter nbextensions_configurator enable --user
73 | jupyter nbextension enable --user hide_input/main
74 | jupyter nbextension enable --user toc2/main
75 | jupyter nbextension enable --user code_prettify/autopep8
76 | jupyter nbextension enable --user execute_time/ExecuteTime
77 | jupyter nbextension enable --py --user qgrid
78 | 
79 | jupyter nbextension install vgrid_jupyter --py --symlink --user --overwrite
80 | jupyter nbextension enable vgrid_jupyter --py --user
81 | 
82 | cd /app
83 | npm link vgrid
84 | npm install
85 | npm run build
86 | 
87 | popd
88 | 
89 | echo "SUCCESS! All dependencies installed"
90 | 


--------------------------------------------------------------------------------
/app/deps/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RUN_TESTS=${RUN_TESTS:=0}
 4 | 
 5 | # Fail fast
 6 | set -e
 7 | 
 8 | DEPS_DIR=/app/deps
 9 | 
10 | pushd .
11 | 
12 | # Rekall
13 | cd $DEPS_DIR
14 | echo "Installing Rekall"
15 | cd rekall
16 | pip3 install --upgrade --force-reinstall --user -e .
17 | if [ $RUN_TESTS == 1 ]; then
18 |         python3 setup.py test
19 | fi
20 | 
21 | # Model server
22 | cd $DEPS_DIR
23 | echo "Installing Model-Server"
24 | cd esper-model-server
25 | ./extract_data.sh
26 | pip3 install --user -r requirements.txt
27 | if [ $RUN_TESTS == 1 ]; then
28 |         pytest -v tests
29 | fi
30 | 
31 | # Caption-Index
32 | cd $DEPS_DIR
33 | echo "Installing Caption-Index"
34 | cd caption-index
35 | rustup update
36 | rustup override set nightly
37 | pip3 install --upgrade --force-reinstall --user .
38 | ./get_models.sh
39 | if [ $RUN_TESTS == 1 ]; then
40 |         python3 setup.py test
41 | fi
42 | 
43 | # Rs-Embed
44 | cd $DEPS_DIR
45 | echo "Installing Rs-Embed"
46 | cd rs-embed
47 | rustup update
48 | rustup override set nightly
49 | pip3 install --upgrade --force-reinstall --user .
50 | if [ $RUN_TESTS == 1 ]; then
51 |         python3 setup.py test
52 | fi
53 | 
54 | cd $DEPS_DIR
55 | echo "Installing vgrid"
56 | cd vgrid
57 | npm install
58 | npm link
59 | npm run build
60 | 
61 | cd $DEPS_DIR
62 | echo "Installing vgrid_jupyter"
63 | cd vgrid_jupyter/js
64 | npm link vgrid
65 | npm install
66 | npm run build
67 | cd ..
68 | pip3 install --upgrade --force-reinstall --user -e .
69 | 
70 | jupyter nbextension enable --py --user widgetsnbextension
71 | jupyter contrib nbextension install --user --skip-running-check
72 | jupyter nbextensions_configurator enable --user
73 | jupyter nbextension enable --user hide_input/main
74 | jupyter nbextension enable --user toc2/main
75 | jupyter nbextension enable --user code_prettify/autopep8
76 | jupyter nbextension enable --user execute_time/ExecuteTime
77 | jupyter nbextension enable --py --user qgrid
78 | 
79 | jupyter nbextension install vgrid_jupyter --py --symlink --user --overwrite
80 | jupyter nbextension enable vgrid_jupyter --py --user
81 | 
82 | cd /app
83 | npm link vgrid
84 | npm install
85 | npm run build
86 | 
87 | popd
88 | 
89 | echo "SUCCESS! All dependencies installed"
90 | 


--------------------------------------------------------------------------------
/app/esper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/esper/__init__.py


--------------------------------------------------------------------------------
/app/esper/blackframes.py:
--------------------------------------------------------------------------------
 1 | from query.models import Video
 2 | from scannertools import shot_detection, Pipeline
 3 | from esper.scannerutil import ScannerWrapper
 4 | from scannerpy import register_python_op
 5 | from scannerpy.stdlib import readers
 6 | import struct
 7 | from typing import Sequence
 8 | from esper.kube import make_cluster, cluster_config, worker_config
 9 | from esper.prelude import pcache, par_for
10 | import numpy as np
11 | import os
12 | 
13 | @register_python_op(name='IsBlack', batch=10000)
14 | def is_black(config, hists: Sequence[bytes]) -> Sequence[bytes]:
15 |     output = []
16 |     for hists_batch in hists:
17 |         h = readers.histograms(hists_batch, config.protobufs)
18 |         threshold = 0.99 * sum(h[0])
19 |         is_black = h[0][0] > threshold and h[1][0] > threshold and h[2][0] > threshold
20 |         output.append(struct.pack('B', 1 if is_black else 0))
21 |     return output
22 | 
23 | class BlackFramesPipeline(Pipeline):
24 |     job_suffix = 'blackframes'
25 |     base_sources = ['videos', 'histograms']
26 |     parser_fn = lambda _: lambda buf, _: struct.unpack('B', buf)
27 | 
28 |     def build_pipeline(self):
29 |         return {
30 |             'blackframes': self._db.ops.IsBlack(hists=self._sources['histograms'].op)
31 |         }
32 | 
33 | compute_black_frames = BlackFramesPipeline.make_runner()
34 | 
35 | if __name__ == "__main__":
36 |     videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id'))
37 | 
38 |     cfg = cluster_config(
39 |         num_workers=100,
40 |         worker=worker_config('n1-standard-32'))
41 |     # with make_cluster(cfg, no_start=True, no_delete=True) as db_wrapper:
42 | 
43 |     if True:
44 |         db_wrapper = ScannerWrapper.create()
45 | 
46 |         db = db_wrapper.db
47 |         hists = shot_detection.compute_histograms(
48 |             db,
49 |             videos=[v.for_scannertools() for v in videos],
50 |             run_opts={
51 |                 'io_packet_size': 10000,
52 |                 'work_packet_size': 1000
53 |             })
54 | 
55 |         bfs = compute_black_frames(
56 |             db,
57 |             videos=[v.for_scannertools() for v in videos],
58 |             histograms=hists,
59 |             run_opts={
60 |                 'io_packet_size': 100000,
61 |                 'work_packet_size': 10000
62 |             })
63 | 
64 |         def load_bf(i):
65 |             path = '/app/data/blackframes/{:07d}.bin'.format(i)
66 |             if os.path.isfile(path):
67 |                 return
68 | 
69 |             try:
70 |                 with open(path, 'wb') as f:
71 |                     f.write(np.array(list(bfs[i].load()), dtype=np.uint8).tobytes())
72 |             except Exception:
73 |                 print(i)
74 | 
75 |         print('Loading...')
76 |         par_for(load_bf, list(range(len(bfs))), workers=8)
77 | 


--------------------------------------------------------------------------------
/app/esper/blurriness.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | import socket
 3 | import scannerpy
 4 | import cv2
 5 | 
 6 | @scannerpy.register_python_op(name='Blurriness')
 7 | def blurriness(config, frame: scannerpy.FrameType, bboxes: bytes) -> bytes:
 8 |     bboxes = json.loads(bboxes.decode('utf-8'))
 9 |     results = []
10 |     for bbox in bboxes:
11 |         img = frame[int(bbox['y1']):int(bbox['y2']),
12 |                     int(bbox['x1']):int(bbox['x2']), :]
13 |         if img.shape[0] == 0 or img.shape[1] == 0:
14 |             continue
15 |         img = cv2.resize(img, (200, 200))
16 |         results.append({
17 |             'id': bbox['id'],
18 |             'blurriness': cv2.Laplacian(img, cv2.CV_64F).var()
19 |         })
20 | 
21 |     return json.dumps(results).encode()
22 | 
23 | 
24 | db = make_scanner_db(multiworker=True)
25 | 
26 | videos = Video.objects.all()[20000:]
27 | 
28 | sql_config = db.protobufs.SQLConfig(
29 |     adapter='postgres',
30 |     hostaddr=socket.gethostbyname('db'),
31 |     port=5432,
32 |     dbname='esper',
33 |     user=os.environ['DJANGO_DB_USER'],
34 |     password=os.environ['DJANGO_DB_PASSWORD'])
35 | sql_query = db.protobufs.SQLQuery(
36 |     fields='''
37 |     query_tvnews_face.id as id,
38 |     (query_tvnews_face.bbox_x1 * query_tvnews_video.width) as x1,
39 |     (query_tvnews_face.bbox_y1 * query_tvnews_video.height) as y1,
40 |     (query_tvnews_face.bbox_x2 * query_tvnews_video.width) as x2,
41 |     (query_tvnews_face.bbox_y2 * query_tvnews_video.height) as y2''',
42 |     table='query_tvnews_face',
43 |     joins='''
44 |     INNER JOIN "query_tvnews_person" ON ("query_tvnews_face"."person_id" = "query_tvnews_person"."id")
45 |     INNER JOIN "query_tvnews_frame" ON ("query_tvnews_person"."frame_id" = "query_tvnews_frame"."id")
46 |     INNER JOIN "query_tvnews_video" ON ("query_tvnews_frame"."video_id" = "query_tvnews_video"."id")
47 |     ''',
48 |     id='query_tvnews_face.id',
49 |     group='query_tvnews_frame.number',
50 |     job_table='query_tvnews_scannerjob'
51 | )
52 | 
53 | frame = db.sources.FrameColumn()
54 | frame_sampled = frame.sample()
55 | bboxes = db.sources.SQL(config=sql_config, query=sql_query)
56 | blurriness = db.ops.Blurriness(frame=frame_sampled, bboxes=bboxes)
57 | output = db.sinks.SQL(config=sql_config, query=sql_query, input=blurriness)
58 | 
59 | log.debug('Fetching indices')
60 | def fetch_indices(v):
61 |     return [f['person__frame__number']
62 |             for f in Face.objects.filter(person__frame__video=v).distinct('person__frame__number') \
63 |             .order_by('person__frame__number').values('person__frame__number')]
64 | frame_indices = par_for(fetch_indices, videos, workers=8)
65 | 
66 | log.debug('Making jobs')
67 | jobs = [
68 |     Job(op_args={
69 |         frame: db.table(v.path).column('frame'),
70 |         frame_sampled: db.sampler.gather(f),
71 |         bboxes: {'filter': 'query_tvnews_video.id = {}'.format(v.id)},
72 |         output: {'job_name': v.path + '_blurriness'}
73 |     })
74 |     for v, f in zip(videos, frame_indices)
75 | ]
76 | 
77 | log.debug('Running job')
78 | db.run(output, jobs, pipeline_instances_per_node=8)
79 | 


--------------------------------------------------------------------------------
/app/esper/download/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | WORKDIR /app
 3 | RUN apt update && apt install -y python3 python3-pip curl
 4 | RUN pip3 install tqdm celery[redis] internetarchive
 5 | # Google Cloud SDK
 6 | COPY service-key.json .
 7 | COPY ia.ini /root/.config/ia.ini
 8 | ENV GOOGLE_APPLICATION_CREDENTIALS /app/service-key.json
 9 | RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | \
10 |     tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
11 |     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
12 |     apt-get update && apt-get install -y google-cloud-sdk kubectl && \
13 |     gcloud config set project visualdb-1046 && \
14 |     gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS
15 | COPY tasks.py .
16 | CMD celery -A tasks worker -c 20


--------------------------------------------------------------------------------
/app/esper/download/count.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | redis-cli -h 10.0.0.3 -p 6379 -n 0 llen celery
3 | 


--------------------------------------------------------------------------------
/app/esper/download/deploy.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: archive-download
 5 | spec:
 6 |   replicas: 50
 7 |   template:
 8 |     metadata:
 9 |       labels:
10 |         app: archive-download
11 |     spec:
12 |       containers:
13 |         - name: archive-download
14 |           image: gcr.io/visualdb-1046/archive-download
15 |           imagePullPolicy: Always
16 |           resources:
17 |             requests:
18 |               cpu: 1.1
19 | 


--------------------------------------------------------------------------------
/app/esper/download/start-cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | gcloud container clusters create archive-download \
4 |        --zone us-east1-b --machine-type "n1-standard-2" --disk-size "100" --num-nodes 50 \
5 |        --network redis --enable-autoscaling --min-nodes 1 --max-nodes 50 \
6 |        --enable-ip-alias
7 | 


--------------------------------------------------------------------------------
/app/esper/download/submit.py:
--------------------------------------------------------------------------------
1 | from tasks import download
2 | from tqdm import tqdm
3 | 
4 | remaining = [s.strip() for s in open('remaining-videos.txt', 'r').readlines()]
5 | 
6 | for video in tqdm(remaining):
7 |     download.delay(video)
8 | 


--------------------------------------------------------------------------------
/app/esper/download/tasks.py:
--------------------------------------------------------------------------------
 1 | from celery import Celery
 2 | from tqdm import tqdm
 3 | import os
 4 | import subprocess as sp
 5 | 
 6 | app = Celery('archive-download', broker='redis://10.0.0.3:6379/0')
 7 | 
 8 | @app.task(acks_late=True)
 9 | def download(video):
10 |     print('Downloading {}'.format(video))
11 | 
12 |     try:
13 |         sp.check_call('ia download {} --glob "*.mp4"'.format(video), shell=True)
14 |         sp.check_call('gsutil mv {id}/{id}.mp4 gs://esper/tvnews/videos/{id}.mp4'.format(id=video), shell=True)
15 |     except Exception:
16 |         print('Error: {}'.format(video))
17 | 


--------------------------------------------------------------------------------
/app/esper/face_detection.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import Timer, Notifier, pcache
 2 | from query.base_models import model_defaults
 3 | from query.models import Video, Frame, Face, Labeler
 4 | from scannertools import kube, face_detection
 5 | from esper.kube import make_cluster, cluster_config, worker_config
 6 | from esper.scanner_bench import ScannerJobConfig, bench
 7 | from esper.scannerutil import ScannerWrapper, ScannerSQLPipeline, ScannerSQLTable
 8 | import scannerpy
 9 | from scannerpy.stdlib import readers
10 | import json
11 | from django.db.models.fields import NOT_PROVIDED
12 | from tqdm import tqdm
13 | 
14 | labeler_id = Labeler.objects.get(name='mtcnn').id
15 | face_defaults = model_defaults(Face)
16 | 
17 | @scannerpy.register_python_op(name='FacesToJson')
18 | def faces_to_json(config, bboxes: bytes, frame_ids: bytes) -> bytes:
19 |     faces = readers.bboxes(bboxes, config.protobufs)
20 |     frame_id = json.loads(frame_ids.decode('utf-8'))[0]['id']
21 |     return json.dumps([
22 |         {'frame_id': frame_id,
23 |          'bbox_x1': f.x1,
24 |          'bbox_x2': f.x2,
25 |          'bbox_y1': f.y1,
26 |          'bbox_y2': f.y2,
27 |          'probability': f.score,
28 |          'labeler_id': labeler_id,
29 |          **face_defaults}
30 |         for f in faces
31 |     ])
32 | 
33 | class FaceDetectionPipeline(ScannerSQLPipeline, face_detection.FaceDetectionPipeline):
34 |     db_class = Face
35 |     json_kernel = 'FacesToJson'
36 |     additional_sources = ['frame_ids']
37 | 
38 |     def build_pipeline(self):
39 |         output_ops = super(FaceDetectionPipeline, self).build_pipeline()
40 |         output_ops['frame_ids'] = self._sources['frame_ids'].op
41 |         return output_ops
42 | 
43 | detect_faces = FaceDetectionPipeline.make_runner()
44 | 
45 | videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
46 | 
47 | if False:
48 |     with Timer('benchmark'):
49 |         videos = videos[:50]
50 |         def run_pipeline(db, videos, frames, **kwargs):
51 |             return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)
52 | 
53 |         cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
54 |         configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
55 |         bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
56 |               run_pipeline, configs, no_delete=True, force=True)
57 | 
58 | 
59 | with Timer('run'):
60 | 
61 |     print('Getting frames')
62 |     def load_frames():
63 |         return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
64 |                 for v in tqdm(videos)]
65 |     frames = pcache.get('face_frames', load_frames)
66 | 
67 |     cfg = cluster_config(
68 |         num_workers=100,
69 |         worker=worker_config('n1-standard-64'),
70 |         num_load_workers=2,
71 |         num_save_workers=2)
72 |     with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper:
73 | 
74 |     # if True:
75 |     #     db_wrapper = ScannerWrapper.create(enable_watchdog=False)
76 | 
77 |         db = db_wrapper.db
78 | 
79 |         print('Starting detection')
80 |         detect_faces(
81 |             db,
82 |             videos=[v.for_scannertools() for v in videos],
83 |             db_videos=videos,
84 |             frames=frames,
85 |             frame_ids=[ScannerSQLTable(Frame, v, num_elements=len(f),
86 |                                        filter='query_frame.shot_boundary = false')
87 |                        for v, f in zip(videos, frames)],
88 |             run_opts={
89 |                 'io_packet_size': 1000,
90 |                 'work_packet_size': 20,
91 |                 'pipeline_instances_per_node': 16,
92 |                 'checkpoint_frequency': 1000
93 |             })
94 | 


--------------------------------------------------------------------------------
/app/esper/face_embeddings.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from rs_embed import EmbeddingData
  4 | 
  5 | EMB_DIR = '/app/data/embs'
  6 | EMB_PATH = os.path.join(EMB_DIR, 'face_embs.bin')
  7 | ID_PATH = os.path.join(EMB_DIR, 'face_ids.bin')
  8 | EMB_DIM = 128
  9 | 
 10 | 
 11 | _EMB_DATA = EmbeddingData(ID_PATH, EMB_PATH, EMB_DIM)
 12 | 
 13 | 
 14 | def count():
 15 |     return _EMB_DATA.count()
 16 | 
 17 |   
 18 | def ids(i, n):
 19 |     """Get n face ids starting at index i"""
 20 |     return _EMB_DATA.ids(i, n)
 21 | 
 22 | 
 23 | def get(ids):
 24 |     """List of face ids -> List of pairs (id, embedding)"""
 25 |     return _EMB_DATA.get(ids)
 26 | 
 27 | 
 28 | def mean(ids):
 29 |     """List of face ids -> mean embedding"""
 30 |     return _EMB_DATA.mean(ids)
 31 | 
 32 | 
 33 | def features(ids):
 34 |     """List of face ids -> List of embeddings"""
 35 |     result = _EMB_DATA.get(ids)
 36 |     assert len(result) == len(ids)
 37 |     return [np.array(v) for _, v in result]
 38 | 
 39 | 
 40 | def sample(k):
 41 |     """Returns list of face_ids, uniformly random with replacement"""
 42 |     return _EMB_DATA.sample(k)
 43 | 
 44 | 
 45 | def exists(ids):
 46 |     """List of face ids -> List of bools"""
 47 |     return _EMB_DATA.exists(ids)
 48 | 
 49 | 
 50 | def dist(ids, targets=None, target_ids=None):
 51 |     """
 52 |     Computes the distance from each face in ids to the closest target
 53 |     
 54 |     Args:
 55 |         ids: List of faces to compute distances for
 56 |         targets: List of embeddings
 57 |         target_ids: List of face_ids
 58 |     
 59 |     Returns:
 60 |         List of distances in same order as as ids
 61 |     """
 62 |     if targets is not None:
 63 |         targets = [
 64 |             [float(z) for z in x.tolist()] 
 65 |             if not isinstance(x, list) else x for x in targets
 66 |         ]
 67 |         return _EMB_DATA.dist(targets, ids)
 68 |     elif target_ids is not None:
 69 |         return _EMB_DATA.dist_by_id(target_ids, ids)
 70 |     else:
 71 |         raise ValueError('No targets given')
 72 | 
 73 | 
 74 | def knn(targets=None, ids=None, k=2 ** 31, max_threshold=100., **kwargs):
 75 |     """
 76 |     Computes distance of all faces to the targets 
 77 |     (specified by targets or ids)
 78 |     
 79 |     Args:
 80 |         targets: List of embeddings (i.e., list of floats)
 81 |         ids: List of face ids (another way to specify targets
 82 |         max_threshold: largest distance
 83 |         
 84 |     Returns:
 85 |         List of (face_id, distance) pairs by asending distance
 86 |     """
 87 |     if targets is not None:
 88 |         targets = [
 89 |             [float(z) for z in x.tolist()] 
 90 |             if not isinstance(x, list) else x for x in targets
 91 |         ]
 92 |         return _EMB_DATA.nn(targets, k, max_threshold, **kwargs)
 93 |     elif ids is not None:
 94 |         return _EMB_DATA.nn_by_id(ids, k, max_threshold, **kwargs)
 95 |     else:
 96 |         raise ValueError('No targets given')
 97 | 
 98 | 
 99 | def kmeans(ids, k=25):
100 |     """
101 |     Run kmeans on all face_ids in ids.
102 |     
103 |     Args:
104 |         ids: List of face_ids
105 |     
106 |     Returns:
107 |         List of (face_id, cluster number) pairs
108 |     """
109 |     return _EMB_DATA.kmeans(ids, k)
110 | 
111 | 
112 | def logreg(ids, labels, **kwargs):
113 |     """
114 |     Args:
115 |         ids: List of face_ids
116 |         labels: List of 0, 1 labels
117 |     Returns:
118 |         weights
119 |     """
120 |     return _EMB_DATA.logreg(
121 |         ids, labels, **kwargs)
122 | 
123 | 
124 | def logreg_predict(weights, **kwargs):
125 |     """Returns: List of (face_id, score) pairs by ascending score)"""
126 |     return _EMB_DATA.logreg_predict(weights, **kwargs)
127 | 
128 | 
129 | def knn_predict(train_ids, train_labels, k, **kwargs):
130 |     """Returns: List of (face_id, score) pairs by ascending score)"""
131 |     return _EMB_DATA.knn_predict(train_ids, train_labels, k, **kwargs)


--------------------------------------------------------------------------------
/app/esper/frame_export.py:
--------------------------------------------------------------------------------
 1 | from query.scripts.script_util import *
 2 | import requests
 3 | import math
 4 | 
 5 | videos = Video.objects.all()
 6 | for video in videos:
 7 |     stride = int(math.ceil(video.fps)/2)
 8 |     print((video.path, video.fps, stride))
 9 |     ids = [
10 |         str(f['id'])
11 |         for f in Frame.objects.filter(video=video, number__in=list(range(0, video.num_frames, stride)))
12 |         .order_by('number').values('id')
13 |     ]
14 |     requests.post('http://localhost:8000/batch_fallback', data={'frames': ','.join(ids)})
15 | 


--------------------------------------------------------------------------------
/app/esper/identity_detect.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from esper.tvnews.embed_kernel import EmbedFaceKernel
 3 | from sklearn.neighbors import NearestNeighbors
 4 | from scannerpy.stdlib import writers
 5 | import cv2
 6 | 
 7 | LABELER, _ = Labeler.objects.get_or_create(name='facenet')
 8 | FEATURE_DISTANCE_THRESHOLD = 1.0
 9 | 
10 | 
11 | # Simple K-NN based identity detector
12 | def identity_detect(videos, exemplar, features, force=False):
13 |     log.debug('Loading features')
14 |     ids, vectors = unzip([((i, j, k), f)
15 |                           for i, vid_features in tqdm(enumerate(features))
16 |                           for j, frame in enumerate(vid_features) for k, f in enumerate(frame)])
17 | 
18 |     log.debug('Building k-nn tree')
19 |     feat_nn = NearestNeighbors().fit(np.vstack(vectors))
20 | 
21 |     log.debug('Computing exemplar features')
22 | 
23 |     def compute_exemplar():
24 |         img = cv2.imread(exemplar)
25 |         with Database() as db:
26 |             bboxes = [db.protobufs.BoundingBox(x1=0, y1=0, x2=img.shape[1], y2=img.shape[0])]
27 |             kernel = EmbedFaceKernel(None, db.protobufs)
28 |             [emb] = kernel.execute(
29 |                 [cv2.cvtColor(img, cv2.COLOR_RGB2BGR),
30 |                  writers.bboxes([bboxes], db.protobufs)[0]])
31 |             return np.frombuffer(emb, dtype=np.float32)
32 | 
33 |     exemplar_vector = pcache.get('exemplar', compute_exemplar, method='pickle', force=force)
34 | 
35 |     log.debug('Doing lookup')
36 |     dists, id_indices = feat_nn.kneighbors([exemplar_vector], len(vectors))
37 | 
38 |     face_map = defaultdict(list)
39 |     for q, (dist, id_idx) in enumerate(zip(dists[0], id_indices[0])):
40 |         (i, j, k) = ids[id_idx]
41 |         if dist > FEATURE_DISTANCE_THRESHOLD:
42 |             break
43 | 
44 |         face_map[i].append((j, k))
45 | 
46 |     return [face_map[i] for i in range(len(videos))]
47 | 
48 |     # return unzip([unzip(face_map[video.id]) for video in videos])
49 | 


--------------------------------------------------------------------------------
/app/esper/ingest_test.py:
--------------------------------------------------------------------------------
 1 | from query.scripts.script_util import *
 2 | import json
 3 | import math
 4 | from django.db import transaction
 5 | 
 6 | def parse(path):
 7 |     with open(path, 'r') as f:
 8 |         while True:
 9 |             path = f.next()[:-1]  # this will raise StopIteration for us when we reach EOF
10 |             num_rows = int(math.ceil(int(next(f)) / 24.0))
11 |             print((path, num_rows))
12 |             yield path, [f.next()[:-1] for _ in range(num_rows)]
13 | 
14 | to_ingest = [
15 |     # ('assets/kcam_tiny_faces.txt', 'tinyfaces'),
16 |     # ('assets/kcam_cpm_faces.txt', 'cpm'),
17 |     ('assets/kcam_frcnn_people.txt', 'frcnn')
18 | ]  # yapf: disable
19 | 
20 | for fpath, labeler_name in to_ingest:
21 |     print(fpath)
22 | 
23 |     labeler, _ = Labeler.objects.get_or_create(name=labeler_name)
24 |     bar = progress_bar(len(list(parse(fpath))))
25 |     does_not_exist = []
26 | 
27 |     for vi, (path, rows) in enumerate(parse(fpath)):
28 |         try:
29 |             video = Video.objects.get(path__contains=path)
30 |         except Video.DoesNotExist:
31 |             does_not_exist.append(path)
32 |             continue
33 | 
34 |         video_boxes = {
35 |             j: [
36 |                 proto.BoundingBox(x1=r[0], y1=r[1], x2=r[2], y2=r[3])
37 |                 for r in [[float(s) for s in box.split(' ')] for box in l.split(',')[:-1]]
38 |             ]
39 |             for j, l in enumerate(rows) if l != ''
40 |         }
41 | 
42 |         Instance = PersonInstance if labeler_name == 'frcnn' else FaceInstance
43 | 
44 |         frames = list(
45 |             Frame.objects.filter(video=video).order_by('number').extra(
46 |                 where=['number mod 24=0']))
47 |         faces = [
48 |             Instance(labeler=labeler, frame=frames[j], bbox=bbox)
49 |             for j, frame_boxes in list(video_boxes.items()) for bbox in frame_boxes
50 |         ]
51 | 
52 |         with transaction.atomic():
53 |             for face in faces:
54 |                 f = Face()
55 |                 f.save()
56 |                 face.concept = f
57 | 
58 |         Instance.objects.bulk_create(faces)
59 | 
60 |         bar.update(vi)
61 | 
62 |     print(('Failed to find: {}'.format(json.dumps(does_not_exist))))
63 | 


--------------------------------------------------------------------------------
/app/esper/ingest_tvnews.py:
--------------------------------------------------------------------------------
 1 | import subprocess as sp
 2 | 
 3 | with open('/app/all-videos.txt', 'r') as f:
 4 |     all_videos = [s.strip() for s in f.readlines()]
 5 | 
 6 | downloaded = [s.strip().split('/')[-1][:-4] for s in sp.check_output('gsutil ls "gs://esper/tvnews/videos/*.mp4"', shell=True).decode('utf-8').splitlines()]
 7 | 
 8 | remaining = set(all_videos) - set(downloaded)
 9 | 
10 | with open('/app/remaining-videos.txt', 'w') as f:
11 |     f.write('\n'.join(list(remaining)))
12 | 


--------------------------------------------------------------------------------
/app/esper/kube.py:
--------------------------------------------------------------------------------
 1 | from scannertools import kube
 2 | from esper.scannerutil import ScannerWrapper
 3 | import os
 4 | import shlex
 5 | from contextlib import contextmanager
 6 | 
 7 | def image_path(device):
 8 |     return 'gcr.io/{project}/{base_name}:{device}'.format(
 9 |         project=os.environ['GOOGLE_PROJECT'],
10 |         base_name=os.environ['BASE_IMAGE_NAME'],
11 |         device=device)
12 | 
13 | cloud_config = kube.CloudConfig(project=os.environ['GOOGLE_PROJECT'])
14 | 
15 | master_config = kube.MachineConfig(
16 |     image=image_path('cpu'),
17 |     type=kube.MachineTypeName(name='n1-highmem-32'),
18 |     disk=250)
19 | 
20 | def worker_config(machine_type, **kwargs):
21 |     return kube.MachineConfig(
22 |         image=image_path('gpu' if 'gpu' in kwargs else 'cpu'),
23 |         type=kube.MachineTypeName(name=machine_type),
24 |         disk=250,
25 |         preemptible=True,
26 |         **kwargs)
27 | 
28 | def cluster_config(**kwargs):
29 |     return kube.ClusterConfig(
30 |         id='wc-test',
31 |         autoscale=True,
32 |         master=master_config,
33 |         **kwargs)
34 | 
35 | def cluster(cluster_config, sql_pool=None, **kwargs):
36 |     containers = []
37 |     if sql_pool is not None:
38 |         proxy_port = 5431
39 |         containers.append({
40 |             'name': 'dbpool',
41 |             'image': 'edoburu/pgbouncer',
42 |             'env': [
43 |                 {'name': 'DATABASE_URL', 'value': 'postgres://{}:{}@0.0.0.0:5431/esper'.format(
44 |                     os.environ['DJANGO_DB_USER'], os.environ['DJANGO_DB_PASSWORD']
45 |                 )},
46 |                 {'name': 'DEFAULT_POOL_SIZE', 'value': str(sql_pool)},
47 |                 {'name': 'MAX_CLIENT_CONN', 'value': '1000'},  # Allow any num. of client connections
48 |                 {'name': 'QUERY_WAIT_TIMEOUT', 'value': '0'},  # Don't timeout long queries
49 |             ]
50 |         })
51 |     else:
52 |         proxy_port = 5432
53 | 
54 |     containers.append({
55 |         'name': 'db',
56 |         'image': 'gcr.io/cloudsql-docker/gce-proxy:1.09',
57 |         'command': shlex.split(
58 |             '/cloud_sql_proxy -instances={}:us-east1-d:esper-dev=tcp:0.0.0.0:{} -credential_file=/config/service-key.json' \
59 |             .format(os.environ['GOOGLE_PROJECT'], proxy_port)),
60 |         'volumeMounts': [{
61 |             'name': 'service-key',
62 |             'mountPath': '/config'
63 |         }]
64 |     })
65 | 
66 |     return kube.Cluster(cloud_config, cluster_config, containers=containers, **kwargs)
67 | 
68 | @contextmanager
69 | def make_cluster(*args, **kwargs):
70 |     with cluster(*args, **kwargs) as c:
71 |         yield ScannerWrapper.create(cluster=c, enable_watchdog=False)
72 | 
73 | if __name__ == '__main__':
74 |     cluster(cluster_config(num_workers=1, worker=worker_config('n1-standard-16'))).cli()
75 | 


--------------------------------------------------------------------------------
/app/esper/load_aligned_transcript.py:
--------------------------------------------------------------------------------
 1 | from scannertools.transcript_alignment import TranscriptAligner
 2 | import pickle
 3 | import os
 4 | import sys
 5 | 
 6 | def callback(name, outputs):
 7 |     video_name = name[:name.find('.mp4')].split('/')[2]
 8 |     path = name.replace('_align_transcript', '')
 9 |     
10 |     align_dir = '/app/data/subs/aligned/'
11 |     align_word_list = []
12 |     num_word_aligned = 0
13 |     num_word_total = 0
14 |     for seg_idx, seg_bytes in enumerate(outputs):
15 |         seg = pickle.loads(seg_bytes)
16 |         align_word_list += seg['align_word_list']
17 |         num_word_aligned += seg['num_word_aligned']
18 |         if 'num_word_total' in seg:
19 |             num_word_total += seg['num_word_total']
20 |         else:
21 |             num_word_total += len(seg['align_word_list'])
22 |                 
23 |     output_path = os.path.join(align_dir, '{}.{}.srt'.format(video_name, 'word'))
24 |     TranscriptAligner.dump_aligned_transcript_byword(align_word_list, output_path)
25 | 
26 |     if num_word_total == 0:
27 |         print(path, 1)
28 |     else:
29 |         print(path, 1 - 1. * num_word_aligned / num_word_total)
30 |     sys.stdout.flush()
31 | 
32 | 


--------------------------------------------------------------------------------
/app/esper/major_canonical_shows.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from esper.widget import *
 3 | from query.models import *
 4 | 
 5 | from datetime import timedelta
 6 | 
 7 | 
 8 | NUM_MAJOR_CANONICAL_SHOWS = 150
 9 | 
10 | MAJOR_CANONICAL_SHOWS = [
11 |     x['show__canonical_show__name'] for x in
12 |     Video.objects.values(
13 |         'show__canonical_show__name'
14 |     ).annotate(
15 |         total_duration=Sum(
16 |             ExpressionWrapper(
17 |                 F('num_frames') / F('fps'),
18 |                 output_field=FloatField()))
19 |     ).order_by(
20 |         '-total_duration'
21 |     ).values(
22 |         'show__canonical_show__name'
23 |     )[:NUM_MAJOR_CANONICAL_SHOWS]
24 | ]
25 | 
26 | # Cache this
27 | _TOTAL_SHOT_TIME_BY_CSHOW = None
28 | 
29 | 
30 | def get_total_shot_time_by_canonical_show():
31 |     global _TOTAL_SHOT_TIME_BY_CSHOW
32 |     if _TOTAL_SHOT_TIME_BY_CSHOW is None:
33 |         query_results = Shot.objects.filter(
34 |             video__show__canonical_show__name__in=MAJOR_CANONICAL_SHOWS,
35 |             in_commercial=False,
36 |         ).values(
37 |             'video__show__canonical_show__name'
38 |         ).annotate(
39 |             screen_time=Sum((F('max_frame') - F('min_frame')) / F('video__fps'),
40 |                             output_field=FloatField())
41 |         )
42 |         _TOTAL_SHOT_TIME_BY_CSHOW = {
43 |             x['video__show__canonical_show__name']:
44 |             timedelta(seconds=x['screen_time']) for x in query_results
45 |         }
46 |     return _TOTAL_SHOT_TIME_BY_CSHOW
47 | 


--------------------------------------------------------------------------------
/app/esper/object_detection.py:
--------------------------------------------------------------------------------
 1 | from scannertools import object_detection
 2 | from esper.prelude import ScannerWrapper, Timer, ScannerSQLTable
 3 | from query.models import Object, Video, Frame, ScannerJob
 4 | import scannerpy
 5 | from scannerpy.stdlib import readers
 6 | import json
 7 | 
 8 | 
 9 | # 1. Define pipeline + auxiliary kernels
10 | 
11 | @scannerpy.register_python_op(name='BboxToJson')
12 | def bbox_to_json(config, bboxes: bytes, frame_id: bytes) -> bytes:
13 |     bboxes = readers.bboxes(bboxes, config.protobufs)
14 |     frame_id = json.loads(frame_id.decode('utf-8'))[0]['id']
15 |     return json.dumps([{
16 |         'bbox_x1': bb.x1,
17 |         'bbox_x2': bb.x2,
18 |         'bbox_y1': bb.y1,
19 |         'bbox_y2': bb.y2,
20 |         'probability': bb.score,
21 |         'label': bb.label,
22 |         'frame_id': frame_id
23 |     } for bb in bboxes])
24 | 
25 | class ObjectDetectionPipeline(object_detection.ObjectDetectionPipeline):
26 |     additional_sources = ['frame_ids']
27 | 
28 |     def build_sink(self, db_videos):
29 |         jsonified = self._db.ops.BboxToJson(
30 |             bboxes=self._output_ops['bboxes'], frame_id=self._sources['frame_ids'].op)
31 |         return ScannerWrapper(self._db).sql_sink(
32 |             cls=Object, input=jsonified, videos=db_videos, suffix='objdet', insert=True)
33 | 
34 |     def parse_output(self):
35 |         pass
36 | 
37 | detect_objects = ObjectDetectionPipeline.make_runner()
38 | 
39 | 
40 | # 2. Gather inputs
41 | 
42 | db_wrapper = ScannerWrapper.create()
43 | db = db_wrapper.db
44 | 
45 | videos = db_wrapper.filter_videos(Video.objects.all(), ObjectDetectionPipeline)[:1]
46 | print('Processing {} videos'.format(len(videos)))
47 | 
48 | frames = [
49 |     [f['number'] for f in
50 |      Frame.objects.filter(video=v).values('number').order_by('number')]
51 |     for v in videos
52 | ]
53 | 
54 | 
55 | # 3. Run pipeline
56 | 
57 | detect_objects(
58 |     db,
59 |     videos=[v.for_scannertools() for v in videos],
60 |     frames=frames,
61 |     frame_ids=[ScannerSQLTable(Frame, v) for v in videos],
62 |     db_videos=videos)
63 | 
64 | print('Done!')
65 | 


--------------------------------------------------------------------------------
/app/esper/queries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/esper/queries/__init__.py


--------------------------------------------------------------------------------
/app/esper/queries/all_faces.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from .queries import query
 3 | 
 4 | @query("All faces")
 5 | def all_faces():
 6 |     from query.models import Face
 7 |     from esper.widget import qs_to_result
 8 |     return qs_to_result(Face.objects.all(), stride=1000)
 9 | 
10 | 
11 | @query("All clothing")
12 | def all_clothing():
13 |     from query.models import Clothing
14 |     from esper.widget import qs_to_result
15 |     return qs_to_result(Clothing.objects.all(), stride=1000)
16 | 
17 | 
18 | @query("All haircolor")
19 | def all_haircolor():
20 |     from query.models import HairColor
21 |     from esper.widget import qs_to_result
22 |     return qs_to_result(HairColor.objects.all(), stride=1000)
23 | 
24 | 
25 | @query("All hairlength")
26 | def all_hairlength():
27 |     from query.models import HairLength
28 |     from esper.widget import qs_to_result
29 |     return qs_to_result(HairLength.objects.all(), stride=1000)
30 | 
31 | 
32 | @query("White hair, not bald")
33 | def white_hair_not_bald():
34 |     from query.models import HairColor, HairLength
35 |     from esper.widget import qs_to_result
36 |     white_hair_ids = {
37 |         x['face__id'] for x in 
38 |         HairColor.objects.filter(color__name='white').values('face__id')[0:1000000:1000]
39 |     }
40 |     bald_ids = {
41 |         x['face__id'] for x in HairLength.objects.filter(
42 |             length__name='bald', face__id__in=white_hair_ids
43 |         ).values('face__id')
44 |     }
45 |     return qs_to_result(Face.objects.filter(id__in=white_hair_ids - bald_ids))
46 | 
47 | 
48 | @query("Face tags")
49 | def face_tags():
50 |     from query.models import FaceTag
51 |     from esper.widget import qs_to_result
52 |     return qs_to_result(FaceTag.objects.filter(
53 |         labeler__name='race:black:labeler=james',
54 |         score__gt=0.5))


--------------------------------------------------------------------------------
/app/esper/queries/all_videos.py:
--------------------------------------------------------------------------------
1 | from esper.prelude import *
2 | from .queries import query
3 | 
4 | @query("All videos")
5 | def all_videos():
6 |     from query.models import Video
7 |     from esper.widget import qs_to_result
8 |     return qs_to_result(Video.objects.all())
9 | 


--------------------------------------------------------------------------------
/app/esper/queries/interview_with_person_x.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from .queries import query
 3 | 
 4 | @query('Interview with person X (rekall)')
 5 | def interview_with_person_x():
 6 |     from query.models import LabeledCommercial, FaceIdentity
 7 |     from rekall.video_interval_collection import VideoIntervalCollection
 8 |     from rekall.temporal_predicates import before, after, overlaps
 9 |     from rekall.logical_predicates import or_pred
10 |     from esper.rekall import intrvllists_to_result
11 | 
12 |     # Get list of sandbox video IDs
13 |     sandbox_videos = [
14 |         row.video_id
15 |         for row in LabeledCommercial.objects.distinct('video_id')
16 |     ]
17 | 
18 |     TWENTY_SECONDS = 600
19 |     FORTY_FIVE_SECONDS = 1350
20 |     EPSILON = 10
21 | 
22 |     guest_name = "bernie sanders"
23 | 
24 |     # Load hosts and instances of guest from SQL
25 |     identities = FaceIdentity.objects.filter(face__shot__video_id__in=sandbox_videos)
26 |     hosts_qs = identities.filter(face__is_host=True)
27 |     guest_qs = identities.filter(identity__name=guest_name).filter(probability__gt=0.7)
28 | 
29 |     # Put bounding boxes in SQL
30 |     hosts = VideoIntervalCollection.from_django_qs(
31 |         hosts_qs.annotate(video_id=F("face__shot__video_id"),
32 |             min_frame=F("face__shot__min_frame"),
33 |             max_frame=F("face__shot__max_frame"))
34 |         )
35 |     guest = VideoIntervalCollection.from_django_qs(
36 |         guest_qs.annotate(video_id=F("face__shot__video_id"),
37 |         min_frame=F("face__shot__min_frame"),
38 |         max_frame=F("face__shot__max_frame"))
39 |     )
40 | 
41 |     # Get all shots where the guest and a host are on screen together
42 |     guest_with_host = guest.overlaps(hosts).coalesce()
43 | 
44 |     # This temporal predicate defines A overlaps with B, or A before by less than 10 frames,
45 |     #   or A after B by less than 10 frames
46 |     overlaps_before_or_after_pred = or_pred(
47 |             or_pred(overlaps(), before(max_dist=EPSILON), arity=2),
48 |             after(max_dist=EPSILON), arity=2)
49 | 
50 |     # This code finds sequences of:
51 |     #   guest with host overlaps/before/after host OR
52 |     #   guest with host overlaps/before/after guest
53 |     interview_candidates = guest_with_host \
54 |             .merge(hosts, predicate=overlaps_before_or_after_pred) \
55 |             .set_union(guest_with_host.merge(
56 |                 guest, predicate=overlaps_before_or_after_pred)) \
57 |             .coalesce()
58 | 
59 |     # Sequences may be interrupted by shots where the guest or host don't
60 |     #   appear, so dilate and coalesce to merge neighboring segments
61 |     interviews = interview_candidates \
62 |             .dilate(TWENTY_SECONDS) \
63 |             .coalesce() \
64 |             .dilate(-1 * TWENTY_SECONDS) \
65 |             .filter_length(min_length=FORTY_FIVE_SECONDS)
66 | 
67 |     # Return intervals
68 |     return intrvllists_to_result(interviews.get_allintervals())
69 | 


--------------------------------------------------------------------------------
/app/esper/queries/panels_rekall.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from .queries import query
 3 | 
 4 | @query('Panels (rekall)')
 5 | def panels_rekall():
 6 |     from query.models import LabeledCommercial, Face
 7 |     from rekall.video_interval_collection import VideoIntervalCollection
 8 |     from rekall.parsers import in_array, bbox_payload_parser
 9 |     from rekall.merge_ops import payload_plus
10 |     from rekall.bbox_predicates import height_at_least, same_value, left_of
11 |     from rekall.spatial_predicates import scene_graph
12 |     from rekall.payload_predicates import payload_satisfies
13 |     from esper.rekall import intrvllists_to_result_bbox
14 | 
15 |     MIN_FACE_HEIGHT = 0.3
16 |     EPSILON = 0.05
17 | 
18 |     # Get list of sandbox video IDs
19 |     sandbox_videos = [
20 |         row.video_id
21 |         for row in LabeledCommercial.objects.distinct('video_id')
22 |     ]
23 | 
24 |     faces_qs = Face.objects.filter(shot__video_id__in=sandbox_videos).annotate(
25 |         video_id=F("shot__video_id"),
26 |         min_frame=F("shot__min_frame"),
27 |         max_frame=F("shot__max_frame")
28 |     )
29 | 
30 |     # One interval for each face
31 |     faces = VideoIntervalCollection.from_django_qs(
32 |             faces_qs,
33 |             with_payload=in_array(
34 |                 bbox_payload_parser(
35 |                     VideoIntervalCollection.django_accessor)))
36 | 
37 |     # Merge shots
38 |     faces = faces.coalesce(payload_merge_op=payload_plus)
39 | 
40 |     # Define a scene graph for things that look like panels
41 |     three_faces_scene_graph = {
42 |         'nodes': [
43 |             { 'name': 'face1', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] },
44 |             { 'name': 'face2', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] },
45 |             { 'name': 'face3', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] }
46 |         ],
47 |         'edges': [
48 |             { 'start': 'face1', 'end': 'face2',
49 |                 'predicates': [ same_value('y1', epsilon=EPSILON), left_of() ] }, 
50 |             { 'start': 'face2', 'end': 'face3',
51 |                 'predicates': [ same_value('y1', epsilon=EPSILON), left_of() ] }, 
52 |         ]
53 |     }
54 | 
55 |     panels = faces.filter(payload_satisfies(
56 |         scene_graph(three_faces_scene_graph, exact=True)
57 |     ))
58 | 
59 |     return intrvllists_to_result_bbox(panels.get_allintervals())
60 | 
61 | 


--------------------------------------------------------------------------------
/app/esper/queries/panels_sql.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from .queries import query
 3 | 
 4 | def panels():
 5 |     from query.base_models import BoundingBox
 6 |     from query.models import Labeler, Face, Frame
 7 |     from esper.widget import qs_to_result
 8 |     from django.db.models import OuterRef, Count, IntegerField
 9 | 
10 |     mtcnn = Labeler.objects.get(name='mtcnn')
11 |     face_qs = Face.objects.annotate(height=BoundingBox.height_expr()).filter(
12 |         height__gte=0.25, labeler=mtcnn, shot__in_commercial=False)
13 |     frames = Frame.objects.annotate(c=Subquery(
14 |         face_qs.filter(frame=OuterRef('pk')) \
15 |         .values('frame') \
16 |         .annotate(c=Count('*')) \
17 |         .values('c'), IntegerField())) \
18 |         .filter(c__gte=3, c__lte=3).order_by('id')
19 | 
20 |     output_frames = []
21 |     for frame in frames[:10000:10]:
22 |         faces = list(face_qs.filter(frame=frame))
23 |         y = faces[0].bbox_y1
24 |         valid = True
25 |         for i in range(1, len(faces)):
26 |             if abs(faces[i].bbox_y1 - y) > 0.05:
27 |                 valid = False
28 |                 break
29 |         if valid:
30 |             output_frames.append((frame, faces))
31 | 
32 |     return output_frames
33 | 
34 | 
35 | @query("Panels (SQL)")
36 | def panels_():
37 |     from esper.queries.panels_sql import panels
38 |     return simple_result([{
39 |         'video': frame.video.id,
40 |         'min_frame': frame.number,
41 |         'objects': [bbox_to_dict(f) for f in faces]
42 |     } for (frame, faces) in panels()], 'Frame')
43 | 


--------------------------------------------------------------------------------
/app/esper/queries/queries.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from collections import defaultdict
 3 | from functools import reduce
 4 | import inspect
 5 | import os
 6 | 
 7 | queries = []
 8 | 
 9 | 
10 | def query(name):
11 |     frame = inspect.stack()[1]
12 |     module = inspect.getmodule(frame[0])
13 |     filename = module.__file__
14 | 
15 |     def wrapper(f):
16 |         lines = inspect.getsource(f).split('\n')
17 |         lines = lines[:-1]  # Seems to include a trailing newline
18 | 
19 |         # Hacky way to get just the function body
20 |         i = 0
21 |         while True:
22 |             if "():" in lines[i]:
23 |                 break
24 |             i = i + 1
25 | 
26 |         fn = lines[i:]
27 |         fn += ['FN = ' + f.__name__]
28 |         queries.append([name, '\n'.join(fn)])
29 | 
30 |         return f
31 | 
32 |     return wrapper
33 | 
34 | from .all_faces import *
35 | from .all_videos import *
36 | from .panels_sql import *
37 | from .panels_rekall import *
38 | from .interview_with_person_x import *
39 | from .sandbox_labels import *
40 | from .other_queries import *
41 | 


--------------------------------------------------------------------------------
/app/esper/queries/sandbox_labels.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from .queries import query
 3 | 
 4 | @query('Hand-labeled Interviews (Sandbox)')
 5 | def handlabeled_interviews():
 6 |     from query.models import LabeledInterview
 7 |     from esper.rekall import qs_to_intrvllists, intrvllists_to_result
 8 | 
 9 |     interviews = LabeledInterview.objects \
10 |             .annotate(fps=F('video__fps')) \
11 |             .annotate(min_frame=F('fps') * F('start')) \
12 |             .annotate(max_frame=F('fps') * F('end'))
13 | 
14 |     return intrvllists_to_result(qs_to_intrvllists(interviews))
15 | 
16 | @query('Hand-labeled Panels (Sandbox)')
17 | def handlabeled_panels():
18 |     from query.models import LabeledPanel
19 |     from esper.rekall import qs_to_intrvllists, intrvllists_to_result
20 | 
21 |     panels = LabeledPanel.objects \
22 |             .annotate(fps=F('video__fps')) \
23 |             .annotate(min_frame=F('fps') * F('start')) \
24 |             .annotate(max_frame=F('fps') * F('end'))
25 | 
26 |     return intrvllists_to_result(qs_to_intrvllists(panels))
27 | 
28 | @query('Hand-labeled Commercials (Sandbox)')
29 | def handlabeled_commercials():
30 |     from query.models import LabeledCommercial
31 |     from esper.rekall import qs_to_intrvllists, intrvllists_to_result
32 | 
33 |     commercials = LabeledCommercial.objects \
34 |             .annotate(fps=F('video__fps')) \
35 |             .annotate(min_frame=F('fps') * F('start')) \
36 |             .annotate(max_frame=F('fps') * F('end'))
37 | 
38 |     return intrvllists_to_result(qs_to_intrvllists(commercials))
39 | 
40 | @query('Multiple Timelines (Sandbox)')
41 | def multiple_timelines():
42 |     from query.models import LabeledInterview, LabeledPanel, LabeledCommercial
43 |     from esper.rekall import qs_to_intrvllists, intrvllists_to_result, add_intrvllists_to_result
44 | 
45 |     interviews = LabeledInterview.objects \
46 |             .annotate(fps=F('video__fps')) \
47 |             .annotate(min_frame=F('fps') * F('start')) \
48 |             .annotate(max_frame=F('fps') * F('end'))
49 |     panels = LabeledPanel.objects \
50 |             .annotate(fps=F('video__fps')) \
51 |             .annotate(min_frame=F('fps') * F('start')) \
52 |             .annotate(max_frame=F('fps') * F('end'))
53 |     commercials = LabeledCommercial.objects \
54 |             .annotate(fps=F('video__fps')) \
55 |             .annotate(min_frame=F('fps') * F('start')) \
56 |             .annotate(max_frame=F('fps') * F('end'))
57 | 
58 |     result = intrvllists_to_result(qs_to_intrvllists(interviews))
59 |     add_intrvllists_to_result(result, qs_to_intrvllists(panels), color="blue")
60 |     add_intrvllists_to_result(result, qs_to_intrvllists(commercials), color="purple")
61 |     return result
62 | 


--------------------------------------------------------------------------------
/app/esper/rekall_vgrid.py:
--------------------------------------------------------------------------------
 1 | """Extensions to rekall.vgrid_utils especialized to Esper Database Schema.
 2 | 
 3 | Tracks:
 4 |     CaptionTrack: Add caption to the VBlocks. Works in either VideoVBlocks or
 5 |         IntervalVBlocks mode.
 6 | """
 7 | 
 8 | from rekall.interval_set_3d import IntervalSet3D, Interval3D
 9 | from rekall.vgrid_utils.vblocks_builder import build_interval, DrawType_Caption
10 | import esper.captions
11 | 
12 | class CaptionTrack:
13 |     """Track for adding captions to vblocks.
14 | 
15 |     Works with either VideoVBlocksBuilder or IntervalVBlocksBuilder.
16 | 
17 |     Example of using with VideoVBlockBuilder:
18 | 
19 |     # face_collection is a DomainIntervalCollection of face intervals.
20 | 
21 |     # We want to see a list of VBlocks where each is a video in the collection
22 |     # We want to see two tracks in each VBlock: one showing all faces in the
23 |     # collection with bounding boxes drawn and a flag metadata set; another
24 |     # showing all captions.
25 | 
26 |     json = VideoVBlocksBuilder()\\
27 |         .add_track(
28 |             VideoTrackBuilder('faces', face_collection)\\
29 |                 .set_draw_type(DrawType_Bbox())\\
30 |                 .add_metadata('flag', Metadata_Flag()))\\
31 |         .add_track(CaptionTrack())\\
32 |         .build()
33 |     """
34 |     def __init__(self):
35 |         self.name = 'caption'
36 |         self.video_ids = set([])
37 |         self._cache = {}
38 | 
39 |     def build_for_video(self, video_id):
40 |         if video_id in self._cache:
41 |             return self._cache[video_id]
42 |         ret = self._get_captions_for_video(video_id)
43 |         self._cache[video_id] = ret
44 |         return ret
45 | 
46 |     def build_for_interval(self, video_id, interval):
47 |         return self.build_for_video(video_id)
48 | 
49 |     def _get_captions_for_video(self, video_id):
50 |         """Returns the JSON intervals with the captions
51 |         
52 |         Note:
53 |             The returned intervals use seconds on temporal dimension instead
54 |             of frame number
55 |         """
56 |         subs = esper.captions.get_json(video_id)
57 | 
58 |         output = []
59 |         for sub in subs:
60 |             interval = Interval3D(
61 |                     (sub['startTime'], sub['endTime']),
62 |                     payload=sub['text'])
63 |             output.append(build_interval(video_id, interval,
64 |                 DrawType_Caption(), {}))
65 |         return output
66 | 


--------------------------------------------------------------------------------
/app/esper/report.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from matplotlib.patches import Patch
 3 | from matplotlib.lines import Line2D
 4 | from scipy.stats import linregress
 5 | import statsmodels.api as sm
 6 | 
 7 | MALE_COLOR = 'tab:blue'
 8 | FEMALE_COLOR = 'tab:red'
 9 | MARKER_SIZE = 50
10 | 
11 | 
12 | def align(col, all_dfs):
13 |     all_cols = reduce(lambda x, y: x & y, [set(list(df[col])) for df in all_dfs])
14 | 
15 |     main_df = all_dfs[0][all_dfs[0][col].isin(all_cols)].sort_values(by=['M%']).reset_index(
16 |         drop=True).reset_index()
17 | 
18 |     def _align(df):
19 |         return df[df[col].isin(all_cols)].set_index(col).reindex(
20 |             main_df[col]).reset_index().reset_index()
21 | 
22 |     return [main_df] + [_align(df) for df in all_dfs[1:]]
23 | 
24 | 
25 | def screen_speak_scatter(screen_df, screen_nh_df, speak_df, speak_nh_df, col, title, plots):
26 |     fig = plt.figure()
27 |     ax = fig.add_subplot(111)
28 | 
29 |     ax.axhline(50, color='black', linestyle='--')
30 | 
31 |     if 'screen' in plots:
32 |         screen_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='s', s=MARKER_SIZE)
33 |         screen_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='s', s=MARKER_SIZE)
34 | 
35 |         if len(plots) == 1:
36 |             pairs = list(zip(screen_df['M%'].tolist(), screen_df['F%'].tolist()))
37 |             c = matplotlib.collections.LineCollection(
38 |                 [((i, a), (i, b)) for (i, (a, b)) in enumerate(pairs)],
39 |                 colors=[MALE_COLOR if a > b else FEMALE_COLOR for (a, b) in pairs],
40 |                 linewidths=[3 for _ in range(len(pairs))])
41 |             ax.add_collection(c)
42 | 
43 |     if 'screen_nh' in plots:
44 |         screen_nh_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='x', s=MARKER_SIZE)
45 |         screen_nh_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='x', s=MARKER_SIZE)
46 | 
47 |         # print(model.summary())
48 |         # n = len(screen_nh_df.index)
49 |         # [intercept, slope] = model.params
50 |         # X = screen_df['M%'].tolist()
51 | 
52 |         # ax.scatter(range(len(X)), [intercept + slope * x for x in X], color='green')
53 |         # ax.axhline(np.mean(screen_nh_df['M%']), color='black', linestyle='--')
54 | 
55 |         # slope, intercept, r, p, _3 = linregress(screen_nh_df.index.tolist(),
56 |         #                                         screen_nh_df['M%'].tolist())
57 |         # ax.plot([0, n], [intercept, intercept + slope * n], color='black')
58 |         # print(r, p)
59 | 
60 |     if 'speak' in plots:
61 |         speak_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='^')
62 |         speak_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='^')
63 | 
64 |     if 'speak_nh' in plots:
65 |         # speak_nh_df.plot('index', 'M%', ax=ax, color='tab:orange', kind='scatter', marker='x')
66 |         pass
67 | 
68 |     ax.set_ylim(0, 100)
69 |     ax.set_ylabel('Percentage of time')
70 |     ax.set_xlabel('')
71 |     ax.set_xticks(range(len(screen_df[col])))
72 |     ax.set_xticklabels(screen_df[col], rotation=45, horizontalalignment='right')
73 |     ax.tick_params(labelsize='large')
74 | 
75 |     legends = {
76 |         'screen': ['Screen time - male', 'Screen time - female'],
77 |         'screen_nh': ['Screen time (no host) - male', 'Screen time (no host) - female'],
78 |         'speak': ['Speaking time - male', 'Speaking time - female'],
79 |         'speak_nh': ['Speaking time (no host)']
80 |     }
81 | 
82 |     ax.legend(['50%'] + flatten([legends[p] for p in plots]))
83 |     plt.title(title)
84 |     plt.tight_layout()
85 | 


--------------------------------------------------------------------------------
/app/esper/spacy_tokenize.py:
--------------------------------------------------------------------------------
  1 | from esper.prelude import *
  2 | from esper.widget import *
  3 | import spacy
  4 | import pysrt
  5 | import itertools
  6 | from spacy.attrs import ORTH, LEMMA
  7 | import esper.datatypes_pb2 as datatypes
  8 | import multiprocessing as mp
  9 | 
 10 | SUB_CACHE_DIR = '/app/data/subs'
 11 | 
 12 | nlp = spacy.load('en', disable=['parser', 'ner'])
 13 | nlp.tokenizer.add_special_case(
 14 |     '>>>',
 15 |     [{ORTH: ">>>", LEMMA: ">>>"}])
 16 | nlp.tokenizer.add_special_case(
 17 |     '>>',
 18 |     [{ORTH: ">>", LEMMA: ">>"}])
 19 | 
 20 | videos = list(Video.objects.all())
 21 | 
 22 | def load_transcript(video):
 23 |     if video.srt_extension == '':
 24 |         return None
 25 | 
 26 |     path = '/app/data/subs/orig/{}.{}.srt'.format(video.item_name(), video.srt_extension)
 27 | 
 28 |     # TODO(wcrichto): small subset of documents are failing with utf8 decode errors
 29 |     try:
 30 |         subs = pysrt.from_string(open(path, 'rb').read().decode('utf-8'))
 31 |     except Exception:
 32 |         print(video.path)
 33 |         return None
 34 | 
 35 |     # In practice, seems like subs are usually about 5 seconds late, so this is a hand-tuned shift
 36 |     subs.shift(seconds=-5)
 37 | 
 38 |     return subs
 39 | 
 40 | def time_to_float(t):
 41 |     return t.hours * 3600 + t.minutes * 60 + t.seconds
 42 | 
 43 | def pos_from_str(s):
 44 |     exceptions = {
 45 |         '-LRB-': 'LRB',
 46 |         '-RRB-': 'RRB',
 47 |         ',': 'COMMA',
 48 |         ':': 'COLON',
 49 |         '.': 'PERIOD',
 50 |         '\'\'': 'SINGLEQUOTE',
 51 |         '""': 'DOUBLEQUOTE',
 52 |         '#': 'POUND',
 53 |         '``': 'BACKTICK',
 54 |         '$': 'DOLLAR',
 55 |         'PRP$': 'PRPD',
 56 |         '_SP': 'SP',
 57 |         'WP$': 'WPD'
 58 |     }
 59 | 
 60 |     try:
 61 |         return getattr(datatypes.Document, exceptions[s] if s in exceptions else s)
 62 |     except AttributeError:
 63 |         # Sometimes spacy returns '' for token.tag_, not sure why? XX is "unknown" so best guess here
 64 |         return datatypes.Document.XX
 65 | 
 66 | def do_tokenize(video):
 67 |     flat_path = '{}/flat/{}.txt'.format(SUB_CACHE_DIR, video.item_name())
 68 |     meta_path = '{}/meta/{}.bin'.format(SUB_CACHE_DIR, video.item_name())
 69 |     if os.path.isfile(meta_path):
 70 |         return
 71 | 
 72 |     subs = load_transcript(video)
 73 |     if subs is None:
 74 |         return
 75 | 
 76 |     # Create/invoke a generator to tokenize the subtitle text
 77 |     texts = [sub.text.encode('ascii', 'ignore').decode('utf-8') for sub in subs]
 78 |     # NB: we have to remove unicode characters for now since Spacy tokens only track the word index, not
 79 |     # byte index of the token, so there's no easy way to figure out the byte offset of an arbitrary token w/
 80 |     # unicode chars > 1 byte.
 81 |     all_tokens = list(nlp.pipe(texts, batch_size=10000, n_threads=mp.cpu_count()))
 82 | 
 83 |     # Convert tokens into Protobuf
 84 |     cursor = 0
 85 |     doc = datatypes.Document()
 86 |     full_text = ''
 87 |     for (sub, text, tokens) in zip(subs, texts, all_tokens):
 88 |         for tok in tokens:
 89 |             word = doc.words.add()
 90 |             word.char_start = cursor + tok.idx
 91 |             word.char_end = word.char_start + len(tok.text)
 92 |             word.time_start = time_to_float(sub.start)
 93 |             word.time_end = time_to_float(sub.end)
 94 |             word.pos = pos_from_str(tok.tag_)
 95 |             word.lemma = tok.lemma_
 96 |         full_text += text + ' '
 97 |         cursor += len(text) + 1
 98 | 
 99 |     # Write flattened transcript as text file
100 |     with open(flat_path, 'w') as f:
101 |         f.write(full_text)
102 | 
103 |     # Write proto metadata
104 |     with open(meta_path, 'wb') as f:
105 |         f.write(doc.SerializeToString())
106 | 
107 | par_for(do_tokenize, videos, workers=12)
108 | 


--------------------------------------------------------------------------------
/app/esper/sql_dump.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | from esper.spark import *
 3 | import django.apps
 4 | import os
 5 | 
 6 | models = [m._meta.db_table for m in django.apps.apps.get_models(include_auto_created=True)]
 7 | 
 8 | with Timer('Exporting models'):
 9 |     def export_model(model):
10 |         try:
11 |             sp.check_call("/app/scripts/export-table.sh {}".format(model), shell=True)
12 |         except Exception:
13 |             import traceback
14 |             print(model)
15 |             traceback.print_exc()
16 |     par_for(export_model, models, workers=8)
17 | 
18 | with Timer('Ingest into Spark'):
19 |     def transfer_model_spark(model):
20 |         if os.path.exists('/app/data/pg/{}.csv'.format(model)):
21 |             df = spark.load_csv('/app/data/pg/{}.csv'.format(model))
22 |             spark.save(model, df)
23 |     par_for(transfer_model_spark, models, workers=8)
24 | 
25 | # with Timer('Ingest into BigQuery'):
26 | #     sp.check_call('bq rm -r -f tvnews && bq mk tvnews', shell=True)
27 | #     def transfer_model_bq(model):
28 | #         try:
29 | #             sp.check_call("/app/scripts/transfer-to-bigquery.sh {}".format(model), shell=True)
30 | #         except Exception:
31 | #             import traceback
32 | #             print(model)
33 | #             traceback.print_exc()
34 | #     par_for(transfer_model_bq, models, workers=8)
35 | 


--------------------------------------------------------------------------------
/app/esper/update_show.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | 
 3 | for video in Video.objects.all():
 4 |     parts = os.path.splitext(os.path.split(video.path)[1])[0].split('_')
 5 |     [channel, date, time] = parts[:3]
 6 |     dt = datetime.datetime.strptime('{} {}'.format(date, time), '%Y%m%d %H%M%S')
 7 |     if channel[-1] == 'W':
 8 |         channel = channel[:-1]
 9 | 
10 |     show = ' '.join(parts[3:-1] if parts[-1] == 'segment' else parts[3:])
11 | 
12 |     video.time = dt
13 |     # video.channel = Channel.objects.get_or_create(name=channel)[0]
14 |     # video.show = Show.objects.get_or_create(name=show)[0]
15 |     video.save()
16 | 


--------------------------------------------------------------------------------
/app/esper/update_tracks.py:
--------------------------------------------------------------------------------
 1 | from esper.prelude import *
 2 | 
 3 | tracks = list(PersonTrack.objects.filter(video__videotag__tag__name='pose-test'))
 4 | for i, track in enumerate(tracks):
 5 |     print(i)
 6 |     faces = list(Face.objects.filter(person__tracks=track).select_related('person__frame'))
 7 |     if len(faces) == 0:
 8 |         continue
 9 |     track.min_frame = min([f.person.frame.number for f in faces])
10 |     track.max_frame = max([f.person.frame.number for f in faces])
11 | PersonTrack.objects.bulk_update(tracks)
12 | 


--------------------------------------------------------------------------------
/app/esper/write_video.py:
--------------------------------------------------------------------------------
 1 | from query.scripts.script_util import *
 2 | from scannerpy.stdlib import writers
 3 | from collections import defaultdict
 4 | 
 5 | with Database() as db:
 6 |     video = Video.objects.all()[0]
 7 |     faces = FaceInstance.objects.filter(frame__video=video).order_by('frame__number').values('bbox', 'frame__number')
 8 |     t = db.table(video.path)
 9 | 
10 |     # frames = defaultdict(list)
11 |     # for face in faces:
12 |     #     bbox = face['bbox']
13 |     #     bbox.x1 *= video.width
14 |     #     bbox.x2 *= video.width
15 |     #     bbox.y1 *= video.height
16 |     #     bbox.y2 *= video.height
17 |     #     frames[face['frame__number']].append(bbox)
18 | 
19 |     # N = t.num_rows()
20 |     # all_bboxes = [[] for _ in range(N)]
21 | 
22 |     # for frame, bboxes in frames.iteritems():
23 |     #     for i in range(frame, min(frame+24, N)):
24 |     #         all_bboxes[i] = bboxes
25 | 
26 |     # bb_t = db.new_table('test', ['bboxes'], [[t] for t in all_bboxes], fn=writers.bboxes, force=True)
27 |     # print bb_t.num_rows()
28 | 
29 |     bb_t = db.table('test')
30 | 
31 |     frame = t.as_op().all()
32 |     bboxes = bb_t.as_op().all()
33 |     out_frame = db.ops.DrawBox(frame=frame, bboxes=bboxes)
34 |     job = Job(columns=[out_frame], name='test2')
35 |     out_table = db.run(job, force=True)
36 |     out_table.column('frame').save_mp4('faces')
37 | 


--------------------------------------------------------------------------------
/app/gunicorn_conf.py:
--------------------------------------------------------------------------------
1 | bind = '0.0.0.0:8000'
2 | loglevel = 'debug'
3 | errorlog = '-'
4 | accesslog = '-'
5 | timeout = 0
6 | workers = 1
7 | 


--------------------------------------------------------------------------------
/app/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
 7 |     try:
 8 |         from django.core.management import execute_from_command_line
 9 |     except ImportError:
10 |         # The above import may fail for some other reason. Ensure that the
11 |         # issue is really that Django is missing to avoid masking other
12 |         # exceptions on Python 2.
13 |         try:
14 |             import django
15 |         except ImportError:
16 |             raise ImportError(
17 |                 "Couldn't import Django. Are you sure it's installed and "
18 |                 "available on your PYTHONPATH environment variable? Did you "
19 |                 "forget to activate a virtual environment?"
20 |             )
21 |         raise
22 |     execute_from_command_line(sys.argv)
23 | 


--------------------------------------------------------------------------------
/app/notebooks/duplicate_videos.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-01-04T03:57:16.981067Z",
  9 |      "start_time": "2019-01-04T03:57:16.340575Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "from esper.prelude import *"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "ExecuteTime": {
 22 |      "end_time": "2019-01-04T03:58:34.416540Z",
 23 |      "start_time": "2019-01-04T03:58:28.009468Z"
 24 |     }
 25 |    },
 26 |    "outputs": [],
 27 |    "source": []
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "ExecuteTime": {
 34 |      "end_time": "2019-01-04T03:58:49.183148Z",
 35 |      "start_time": "2019-01-04T03:58:48.969612Z"
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": []
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "ExecuteTime": {
 46 |      "end_time": "2019-01-04T04:05:25.137187Z",
 47 |      "start_time": "2019-01-04T04:05:25.043349Z"
 48 |     }
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "Video.objects.filter(duplicate=False).count()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-01-04T04:04:39.154696Z",
 61 |      "start_time": "2019-01-04T04:02:37.122592Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "for channel in Channel.objects.all():\n",
 67 |     "    videos = list(Video.objects.filter(channel=channel).order_by('time'))\n",
 68 |     "    by_time = collect(videos, lambda v: v.time)\n",
 69 |     "    for k, vs in by_time.items():\n",
 70 |     "        if len(vs) > 1:\n",
 71 |     "            for v in vs[1:]:\n",
 72 |     "                v.duplicate = True\n",
 73 |     "                v.save()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "ExecuteTime": {
 81 |      "end_time": "2019-01-04T04:01:22.106396Z",
 82 |      "start_time": "2019-01-04T04:01:22.038049Z"
 83 |     }
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "pprint([v.item_name() for v in Video.objects.filter(channel__name='CNN', time__year=2012, time__month=3, time__day=11).order_by('time')])"
 88 |    ]
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "kernelspec": {
 93 |    "display_name": "Django Shell-Plus",
 94 |    "language": "python",
 95 |    "name": "django_extensions"
 96 |   },
 97 |   "language_info": {
 98 |    "codemirror_mode": {
 99 |     "name": "ipython",
100 |     "version": 3
101 |    },
102 |    "file_extension": ".py",
103 |    "mimetype": "text/x-python",
104 |    "name": "python",
105 |    "nbconvert_exporter": "python",
106 |    "pygments_lexer": "ipython3",
107 |    "version": "3.5.2"
108 |   },
109 |   "toc": {
110 |    "nav_menu": {},
111 |    "number_sections": true,
112 |    "sideBar": true,
113 |    "skip_h1_title": false,
114 |    "toc_cell": false,
115 |    "toc_position": {},
116 |    "toc_section_display": "block",
117 |    "toc_window_display": false
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 2
122 | }
123 | 


--------------------------------------------------------------------------------
/app/notebooks/example.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from query.datasets.prelude import *"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "print('Number of frames: {}'.format(Frame.objects.all().count()))      \n",
19 |     "print('Number of faces: {}'.format(Face.objects.all().count()))      \n",
20 |     "print('Number of poses: {}'.format(Pose.objects.all().count()))"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": null,
26 |    "metadata": {},
27 |    "outputs": [],
28 |    "source": [
29 |     "# TODO(wcrichto): more examples here"
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "Django Shell-Plus",
36 |    "language": "python",
37 |    "name": "django_extensions"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 2
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython2",
49 |    "version": "2.7.14"
50 |   }
51 |  },
52 |  "nbformat": 4,
53 |  "nbformat_minor": 2
54 | }
55 | 


--------------------------------------------------------------------------------
/app/notebooks/export_face_bboxes_for_aws.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-08-08T23:32:43.002377Z",
  9 |      "start_time": "2019-08-08T23:32:42.973091Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "import os\n",
 15 |     "import json\n",
 16 |     "from collections import defaultdict\n",
 17 |     "from tqdm import tqdm\n",
 18 |     "\n",
 19 |     "OUT_DIR = '/app/data/aws_face_bboxes_new'\n",
 20 |     "\n",
 21 |     "def get_video_name(s):\n",
 22 |     "    return os.path.splitext(s.split('/')[-1])[0]\n",
 23 |     "\n",
 24 |     "frame_3s_sampler = FrameSampler.objects.get(name='3s')\n",
 25 |     "face_labeler = Labeler.objects.get(name='mtcnn:july-25-2019')"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "ExecuteTime": {
 33 |      "start_time": "2019-08-08T23:32:44.081Z"
 34 |     }
 35 |    },
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stderr",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       " 48%|█████████████████████████▍                           | 123581/257031 [49:38<19:34, 113.60it/s]"
 42 |      ]
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "if not os.path.exists(OUT_DIR):\n",
 47 |     "    os.makedirs(OUT_DIR)\n",
 48 |     "\n",
 49 |     "for video in tqdm(Video.objects.all().order_by('id')):\n",
 50 |     "    video_name = get_video_name(video.path)\n",
 51 |     "    frame_to_faces = defaultdict(list)\n",
 52 |     "    for f in Face.objects.filter(\n",
 53 |     "            frame__video=video, frame__sampler=frame_3s_sampler, labeler=face_labeler\n",
 54 |     "        ).values('id', 'frame__number', 'bbox_x1', 'bbox_x2', 'bbox_y1', 'bbox_y2'):\n",
 55 |     "        frame_to_faces[f['frame__number']].append({\n",
 56 |     "            'id': f['id'], 'bbox': {\n",
 57 |     "                'x1': f['bbox_x1'], 'x2': f['bbox_x2'], 'y1': f['bbox_y1'], 'y2': f['bbox_y2']\n",
 58 |     "            }\n",
 59 |     "        })\n",
 60 |     "    video_out_file = os.path.join(OUT_DIR, video_name + '.faces.json')\n",
 61 |     "    with open(video_out_file, 'w') as f:\n",
 62 |     "        json.dump(list(frame_to_faces.items()), f)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "ExecuteTime": {
 70 |      "start_time": "2019-08-09T00:22:32.815Z"
 71 |     }
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "video_and_fps = [(v.id, get_video_name(v.path), v.fps) for v in Video.objects.all()]"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "ExecuteTime": {
 83 |      "start_time": "2019-08-09T00:22:33.167Z"
 84 |     }
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "with open('/app/data/videos_and_fps.json', 'w') as f:\n",
 89 |     "    json.dump(video_and_fps, f)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": []
 98 |   }
 99 |  ],
100 |  "metadata": {
101 |   "kernelspec": {
102 |    "display_name": "Django Shell-Plus",
103 |    "language": "python",
104 |    "name": "django_extensions"
105 |   },
106 |   "language_info": {
107 |    "codemirror_mode": {
108 |     "name": "ipython",
109 |     "version": 3
110 |    },
111 |    "file_extension": ".py",
112 |    "mimetype": "text/x-python",
113 |    "name": "python",
114 |    "nbconvert_exporter": "python",
115 |    "pygments_lexer": "ipython3",
116 |    "version": "3.5.2"
117 |   },
118 |   "toc": {
119 |    "nav_menu": {},
120 |    "number_sections": true,
121 |    "sideBar": true,
122 |    "skip_h1_title": false,
123 |    "toc_cell": false,
124 |    "toc_position": {},
125 |    "toc_section_display": "block",
126 |    "toc_window_display": false
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 2
131 | }
132 | 


--------------------------------------------------------------------------------
/app/notebooks/face_attribute_visual_validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-01-10T21:29:06.390877Z",
  9 |      "start_time": "2019-01-10T21:29:06.369328Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "from IPython.display import display\n",
 15 |     "from esper.widget import *"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "# Strided sample of haircolors"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "ExecuteTime": {
 30 |      "end_time": "2019-01-10T21:29:47.894560Z",
 31 |      "start_time": "2019-01-10T21:29:38.783980Z"
 32 |     },
 33 |     "scrolled": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "for hc in HairColorName.objects.all():\n",
 38 |     "    print(hc.name)\n",
 39 |     "    widget = esper_widget(\n",
 40 |     "        qs_to_resultf==,\n",
 41 |     "        jupyter_keybindings=True, disable_playback=True, results_per_page=25, \n",
 42 |     "        crop_bboxes=True\n",
 43 |     "    )\n",
 44 |     "    display(widget)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "# Strided sample of hairlengths"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "ExecuteTime": {
 59 |      "end_time": "2019-01-10T21:29:30.614884Z",
 60 |      "start_time": "2019-01-10T21:29:18.554504Z"
 61 |     },
 62 |     "scrolled": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "for hl in HairLengthName.objects.all():\n",
 67 |     "    print(hl.name)\n",
 68 |     "    widget = esper_widget(\n",
 69 |     "        qs_to_result(HairLength.objects.filter(length=hl), stride=10000),\n",
 70 |     "        jupyter_keybindings=True, disable_playback=True, results_per_page=25,\n",
 71 |     "        crop_bboxes=False\n",
 72 |     "    )\n",
 73 |     "    display(widget)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": []
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Django Shell-Plus",
 87 |    "language": "python",
 88 |    "name": "django_extensions"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.5.2"
101 |   },
102 |   "toc": {
103 |    "nav_menu": {},
104 |    "number_sections": true,
105 |    "sideBar": true,
106 |    "skip_h1_title": false,
107 |    "toc_cell": false,
108 |    "toc_position": {},
109 |    "toc_section_display": "block",
110 |    "toc_window_display": false
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 2
115 | }
116 | 


--------------------------------------------------------------------------------
/app/notebooks/get_canonical_shows.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "ExecuteTime": {
 8 |      "end_time": "2018-12-18T23:51:48.501217Z",
 9 |      "start_time": "2018-12-18T23:51:39.741092Z"
10 |     }
11 |    },
12 |    "outputs": [],
13 |    "source": [
14 |     "from esper.prelude import *\n",
15 |     "from esper.identity import *"
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "code",
20 |    "execution_count": null,
21 |    "metadata": {
22 |     "ExecuteTime": {
23 |      "end_time": "2018-12-19T00:56:45.311540Z",
24 |      "start_time": "2018-12-19T00:56:45.030276Z"
25 |     },
26 |     "scrolled": false
27 |    },
28 |    "outputs": [],
29 |    "source": [
30 |     "from django.db.models import Count, Sum, F, FloatField\n",
31 |     "show_and_video_counts = { \n",
32 |     "    (x['show__name'], x['channel__name'], x['count'], x['total_duration'] / 3600.)\n",
33 |     "    for x in Video.objects.values(\n",
34 |     "        'show__name', 'channel__name'\n",
35 |     "    ).annotate(\n",
36 |     "        count=Count('show__name'),\n",
37 |     "        total_duration=Sum(F('num_frames') / F('fps'), output_field=FloatField())\n",
38 |     "    )\n",
39 |     "}\n",
40 |     "\n",
41 |     "print('Show Name,Channel,Video Count,Hours,Recurring,Canonical Show Name'.format(k, v))\n",
42 |     "for name, channel, count, duration in sorted(show_and_video_counts):\n",
43 |     "    print('\"{}\",\"{}\",{},{},{},\"{}\"'.format(name, channel, count, duration, count > 30, name))"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "code",
48 |    "execution_count": null,
49 |    "metadata": {},
50 |    "outputs": [],
51 |    "source": []
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Django Shell-Plus",
57 |    "language": "python",
58 |    "name": "django_extensions"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 3
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython3",
70 |    "version": "3.5.2"
71 |   },
72 |   "toc": {
73 |    "nav_menu": {},
74 |    "number_sections": true,
75 |    "sideBar": false,
76 |    "skip_h1_title": false,
77 |    "toc_cell": false,
78 |    "toc_position": {},
79 |    "toc_section_display": "block",
80 |    "toc_window_display": false
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 2
85 | }
86 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Chris Harper-Mercer.txt:
--------------------------------------------------------------------------------
1 | {"0": [289439165, 315101219, 260135011, 308988138, 314370344, 305932611, 283017590, 260135307, 283016608, 314371520, 291186128, 282895833, 305931811, 260135504, 327684878, 314372696, 283016805, 327627367, 308988534, 305932411, 307244791, 260135702, 314372893, 260135899, 308988731, 310765415, 282896026, 314369956, 283016020, 310765215, 260142048, 267831330, 267831133, 266475600, 309042293, 285205841, 248687001, 270940645, 327555565, 317921366, 267871773, 324849199, 267871674, 267871872, 266819217, 291155296, 248689346, 338689817, 317922150, 266475404, 267831231, 299585502, 344686599, 270840741, 344609815, 291145765, 267849766, 260071466, 329150508, 305934394, 293430334, 310752331, 267887714, 270844023, 284599420, 329151511, 344609964, 318423581, 269622985, 267887813, 298280144, 305935571, 288912611, 267849966, 263811311, 306316738, 310752529, 289382674, 342406427, 306326844, 272915815, 329147756, 267824495, 310810017, 310753521, 270921128, 288912809, 267865514, 267824593, 299997093, 275584516, 310756882, 267886111, 292246067, 291182143, 292258380, 267888211, 288913007, 269622888, 293431922, 275252860, 267886211, 305934982, 329151111, 329150111, 310752928, 305934004, 310752129, 309042886, 329148141, 328749809, 291183347, 292246264, 329413382, 270841637, 269623081, 267848492, 329150309, 266855228, 310757864, 293430136, 289382277, 342143909, 328750006, 263811011, 263935941, 329413580, 310753127, 270841835, 306415659, 314347383, 288823489, 276023556, 279981281, 327635496, 279986424, 306316544, 330612997, 250886275, 288813361, 314413506, 327641439, 306415458, 329037249, 267162054, 276023752, 314347977, 276033997, 217173467, 288813559, 276033800, 279893566, 327635896, 307206840, 279981680, 306316933, 327638666, 324909714, 270219887, 306415260, 307206640, 306326645, 275584713, 279891578, 270160083, 291824400, 307244591, 306317127, 310809418, 276023949, 342501939, 276024146, 330612601, 265332606, 307207039, 275584910, 310757276, 298281908, 276120501, 327636297, 265319430, 344655884, 314414094, 305935375, 310809617, 293429940, 308726853, 306208472, 250886230, 283026869, 299997293, 292246659, 266444934, 344621192, 270839945, 267849866, 292477069, 279986035, 305934200, 327626965, 308727249, 283026675, 267871478, 293430531, 272915716, 275252494, 267887913, 326597683, 270834998, 329041207, 275252677, 267850066, 267871576, 275254145, 292258182, 288823688, 267888012, 279891182, 315101612, 267850164, 267886013, 329150911, 305934786, 260140961, 275106539, 329037052, 267888112, 318878204, 270219784, 267887120, 314413308, 267850263, 319496732, 267886112, 327685668, 291825194, 329413187, 291824200, 298280540, 267848293, 267850362, 263811711, 318878397, 299996894, 267831036, 291823399, 260140860, 305935178, 291145565, 308987744, 263935841, 344609666, 270834605, 318877626, 292476868, 327685074, 344621044, 266570753, 279885837, 291186718, 306356264, 279980093, 291185734, 279995463, 266570853, 279984248, 279990358, 279886037, 266570953, 279979895, 279886635, 291178750, 310753324, 291185931, 279990553, 266571053, 291178551, 279983457, 319436153, 279886237, 266570170, 291178949, 279990749, 279979496, 291185140, 330615566, 327630353, 279983655, 279886436, 266570366, 291179149, 291186320, 279990945, 279979695, 291185336, 266570463, 279983852, 279885438, 342502137, 270943008, 266570560, 314371912, 291179348, 291186519, 279991144, 291185536, 279995265, 270220187, 327630750, 279885637, 266570657, 279984050, 305932211, 266570269, 291186915, 279886834, 263811111, 270215223, 267827259, 267867199, 309043281, 260071566, 329413777, 335738394, 267867299, 329150708, 327638273, 267867400, 330612002, 263811411, 329413975, 267867501, 314414486, 330615175, 305935767, 270844315, 263811511, 342406623, 279985642, 305933806, 283028985, 329414170, 263811611, 267867701, 267826960, 327639651, 267867801, 267826861, 313207510, 267867601, 309041899, 342284029, 260141852, 263811211, 267827059, 267865415, 267827159, 267867098], "2": [288705695, 338689216, 289417415, 299584727, 289417613, 275926515, 259669979, 299584921, 259669879, 259670179, 289418212, 338689019, 306208671]}


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Eric Garner.txt:
--------------------------------------------------------------------------------
1 | {"0": [216508454, 147071711, 215036859, 298486470], "1": [215971845, 215946506, 215936524, 215961880, 215946265, 215942942, 215934756, 215970087, 215946026, 215970292, 215969331, 215969858, 215961666, 215945800, 318887266, 215962638, 215934042, 215933788, 215970909, 215971423, 215962210, 215964263, 215945454, 215969652, 215945338, 215962431, 147072533, 215944320, 215935108, 215943980, 215943055, 215964053, 215970712, 215971737, 215943749, 215943623, 215934279, 215943394, 215970504, 215964363, 215972054, 215972573, 215969224, 215962856, 215972263, 75473904, 216137594, 216137004, 213625870, 336362002, 214576149, 216137243, 214577157, 336362795, 336363589, 214576613, 215445324, 214575697, 336364382, 216136763, 214575474, 215444647, 213131956, 214576833, 214576038, 213130969, 336360411, 213627871, 215651453, 214576370, 213628916, 215652137, 336361208, 216148989, 213629113], "2": [143039353, 74872605, 144399912, 143039020, 213113053, 144371511, 74938509, 144399192, 144370788, 74872934, 213112938, 144400268, 213112717, 74873230, 144399549, 213112825, 74872314], "3": [216238362, 345062702, 345064752, 217103670, 217039935, 311521376, 217103549, 216128918, 216129185, 223016797, 345064136, 250738104, 250738106, 250738108, 250738110, 217040068, 216238246, 345063420, 223016686, 345065468, 296458037, 275929954, 296457838, 275930748, 296456647, 296457442, 308692195, 296457045], "4": [285968605, 285975374, 76529506, 343714038, 218939196, 217498951, 216238476, 76532589, 76531065, 218139528, 219832033, 219786536]}


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Info.txt:
--------------------------------------------------------------------------------
 1 | Accounted for people (more clusters per person, focus on controversial names):
 2 | 
 3 | Shooters:
 4 | 
 5 | Chris Harper-Mercer (Okay)
 6 | Devin Patrick Kelley (Okay)
 7 | Dylann Roof (Good)
 8 | Esteban Santiago-Ruiz (Okay)
 9 | Gavin Eugene-Long (Bad-not significant enough data)
10 | Micah Xavier Johnson (Good)
11 | Omar Mateen (Good)
12 | Robert Lewis Dear Jr (Good)
13 | Stephen Paddock (Good)
14 | Syed Rizwan Farook (Good)
15 | Tashfeen Malik (Good)
16 | Willie Corey Godbolt (Bad-not significant enough data)
17 | 
18 | Victims:
19 | Eric Garner (Not in database)
20 | Laquan McDonald (Not in database)
21 | Michael Brown (Not in database)
22 | Philando Castille (Not in database)
23 | Tamir Rice (Not in database)
24 | Trayvon Martin (Redo)
25 | Walter Scott (IDK)
26 | 
27 | Police Officers:
28 | 
29 | Daniel Pantaleo (Eric Garner, Not in DB)
30 | Darren Wilson (Michaela Brown, Not in DB)
31 | Jason Van Dyke (Laquan McDonald, Not in DB)
32 | ??? (Tamir Rice, Unknown)
33 | Michael Slager (Walter Scott, Not in DB)
34 | Blane Salamoni (Alton Sterling, Not in DB)
35 | Howie Lake II (Alton Sterling, Not in DB)
36 | Jeronimo Yanez (Philando Castille, Not in DB)
37 | 
38 | Celebrity:
39 | 
40 | Robert Ailes (TBD)
41 | Miley Cyrus (TBD)
42 | Kanye West (Not in database)
43 | George Bush (Not in database)
44 | Michael Jackson (Not in database)
45 | Barack Obama (Not in database)
46 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Laquan McDonald (Victim).ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Django Shell-Plus",
14 |    "language": "python",
15 |    "name": "django_extensions"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.5.2"
28 |   },
29 |   "toc": {
30 |    "nav_menu": {},
31 |    "number_sections": true,
32 |    "sideBar": true,
33 |    "skip_h1_title": false,
34 |    "toc_cell": false,
35 |    "toc_position": {},
36 |    "toc_section_display": "block",
37 |    "toc_window_display": false
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 2
42 | }
43 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Michael Brown (Victim).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Michael Brown"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "ExecuteTime": {
 15 |      "end_time": "2019-08-07T17:22:52.450158Z",
 16 |      "start_time": "2019-08-07T17:22:45.358697Z"
 17 |     }
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import ipywidgets as widgets\n",
 22 |     "from IPython.display import display\n",
 23 |     "import esper.identity_clusters\n",
 24 |     "from esper.identity_clusters import identity_clustering_workflow,_manual_recluster,visualization_workflow"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "ExecuteTime": {
 32 |      "end_time": "2019-08-07T17:22:52.496257Z",
 33 |      "start_time": "2019-08-07T17:22:52.453623Z"
 34 |     }
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "shootings = [\n",
 39 |     "    ('Muhammad Youssef Abdulazeez', 'Chattanooga', 'Jul 16, 2015'),\n",
 40 |     "    ('Chris Harper-Mercer', 'Umpqua Community College', 'Oct 1, 2015'),\n",
 41 |     "    ('Robert Lewis Dear Jr', 'Colorado Springs - Planned Parenthood', 'Nov 27, 2015'),\n",
 42 |     "    ('Syed Rizwan Farook', 'San Bernardino', 'Dec 2, 2015'), \n",
 43 |     "    ('Tashfeen Malik', 'San Bernardino', 'Dec 2, 2015'),\n",
 44 |     "    ('Dylann Roof', 'Charleston Shurch', 'Jun 17, 2015'),\n",
 45 |     "    ('Omar Mateen', 'Orlando Nightclub', 'Jun 12, 2016'),\n",
 46 |     "    ('Micah Xavier Johnson', 'Dallas Police', 'Jul 7-8, 2016'),\n",
 47 |     "    ('Gavin Eugene Long', 'Baton Rouge Police', 'Jul 17, 2016'),\n",
 48 |     "    ('Esteban Santiago-Ruiz', 'Ft. Lauderdale Airport', 'Jan 6, 2017'),\n",
 49 |     "    ('Willie Corey Godbolt', 'Lincoln County', 'May 28, 2017'),\n",
 50 |     "    ('Stephen Paddock', 'Las Vegas', 'Oct 1, 2017'),\n",
 51 |     "    ('Devin Patrick Kelley', 'San Antonio Church', 'Nov 5, 2017'),\n",
 52 |     "    ('Eric Garner', 'New York', 'Jul 17, 2014'),\n",
 53 |     "    ('Michael Brown', 'Ferguson', 'Aug 09, 2014')\n",
 54 |     "]\n",
 55 |     "orm_set = { x.name for x in Identity.objects.filter(name__in=[s[0].lower() for s in shootings]) }\n",
 56 |     "for s in shootings:\n",
 57 |     "    assert s[0].lower() in orm_set, '{} is not in the database'.format(s)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "ExecuteTime": {
 65 |      "start_time": "2019-08-07T17:22:14.990Z"
 66 |     }
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "identity_clustering_workflow('Michael Brown','Aug 09, 2014', True)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": []
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "Django Shell-Plus",
 84 |    "language": "python",
 85 |    "name": "django_extensions"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.5.2"
 98 |   },
 99 |   "toc": {
100 |    "nav_menu": {},
101 |    "number_sections": true,
102 |    "sideBar": true,
103 |    "skip_h1_title": false,
104 |    "toc_cell": false,
105 |    "toc_position": {},
106 |    "toc_section_display": "block",
107 |    "toc_window_display": false
108 |   }
109 |  },
110 |  "nbformat": 4,
111 |  "nbformat_minor": 2
112 | }
113 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Philando Castile (Victim).ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Django Shell-Plus",
14 |    "language": "python",
15 |    "name": "django_extensions"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.5.2"
28 |   },
29 |   "toc": {
30 |    "nav_menu": {},
31 |    "number_sections": true,
32 |    "sideBar": true,
33 |    "skip_h1_title": false,
34 |    "toc_cell": false,
35 |    "toc_position": {},
36 |    "toc_section_display": "block",
37 |    "toc_window_display": false
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 2
42 | }
43 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Walter Scott (Victim).ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Django Shell-Plus",
14 |    "language": "python",
15 |    "name": "django_extensions"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.5.2"
28 |   },
29 |   "toc": {
30 |    "nav_menu": {},
31 |    "number_sections": true,
32 |    "sideBar": true,
33 |    "skip_h1_title": false,
34 |    "toc_cell": false,
35 |    "toc_position": {},
36 |    "toc_section_display": "block",
37 |    "toc_window_display": false
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 2
42 | }
43 | 


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Willie Corey Godbolt.txt:
--------------------------------------------------------------------------------
1 | {"8": [48300033, 147300397, 257024107, 275603643, 340277298, 147665202, 303567166, 87942542, 148391520, 250380699, 303567365, 147796497, 256591299, 148378295, 147295022, 78029629, 147451364, 340653098, 300084063, 334263361, 148378792, 147657912, 147302633, 147789191, 340590468, 147666495, 147665859, 340276900, 147789515, 148379434, 147658542, 340277099, 147302303, 148391850, 147658860, 280660079, 92649587, 340584709, 78018826, 280660274, 147663942, 334309830, 147663309, 78027223, 147294695, 147667455, 76298810, 78019164, 334310030, 340560537, 78007571, 78019330, 147658226, 147450716, 78019503, 300084260, 78019672, 147295335, 147668090, 147451038, 215524142, 217312517, 334261588, 217312662, 78007746, 148807590, 78008417, 78007914, 147664575, 147300081, 78008081, 147301355, 147795854, 78008248, 275603443]}


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/123703025.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/123703025.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251405426.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251405426.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251529606.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251529606.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251626619.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251626619.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/261346406.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/261346406.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/262510755.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/262510755.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/268732111.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/268732111.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/314770412.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/314770412.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/7665011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/7665011.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/test1.html:
--------------------------------------------------------------------------------
1 | Name: Syed Rizwan Farook


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/ove_cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/ove_cat.jpg


--------------------------------------------------------------------------------
/app/notebooks/michaela_notebooks/scratch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-06-27T20:29:03.643667Z",
  9 |      "start_time": "2019-06-27T20:29:03.620653Z"
 10 |     }
 11 |    },
 12 |    "outputs": [
 13 |     {
 14 |      "name": "stdout",
 15 |      "output_type": "stream",
 16 |      "text": [
 17 |       "/app/data/black_face_ids.csv\n",
 18 |       "/app/data/hayes.png\n",
 19 |       "/app/data/aws-face-crops.tar.gz\n",
 20 |       "/app/data/dan_neurips\n",
 21 |       "/app/data/lemon.png\n",
 22 |       "/app/data/stories\n",
 23 |       "/app/data/blond_fox.png\n",
 24 |       "/app/data/video_10y.csv\n",
 25 |       "/app/data/google_images\n",
 26 |       "/app/data/azure-face-crops\n",
 27 |       "/app/data/race_pred\n",
 28 |       "/app/data/black.dan.250.random.json\n",
 29 |       "/app/data/embs\n",
 30 |       "/app/data/cnn_fox_msnbc.png\n",
 31 |       "/app/data/stories-data\n",
 32 |       "/app/data/video_ia_ids.json\n",
 33 |       "/app/data/baldwin.png\n",
 34 |       "/app/data/index10a\n",
 35 |       "/app/data/face_eval\n",
 36 |       "/app/data/widget-data\n",
 37 |       "/app/data/pg\n",
 38 |       "/app/data/gender_10y.csv\n",
 39 |       "/app/data/black.250.random.json\n",
 40 |       "/app/data/maddow.png\n",
 41 |       "/app/data/blond_cnn.png\n",
 42 |       "/app/data/face_identity_model_v2\n",
 43 |       "/app/data/index10\n",
 44 |       "/app/data/spark\n",
 45 |       "/app/data/interview_10y-all.pkl\n",
 46 |       "/app/data/.ipynb_checkpoints\n",
 47 |       "/app/data/black.1k.random.json\n",
 48 |       "/app/data/identity_10y.csv\n",
 49 |       "/app/data/aws-face-crops\n"
 50 |      ]
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "import os\n",
 55 |     "\n",
 56 |     "# The data directory in the container\n",
 57 |     "for x in os.listdir('/app/data'):\n",
 58 |     "    print('/app/data/' + x)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": []
 67 |   }
 68 |  ],
 69 |  "metadata": {
 70 |   "kernelspec": {
 71 |    "display_name": "Django Shell-Plus",
 72 |    "language": "python",
 73 |    "name": "django_extensions"
 74 |   },
 75 |   "language_info": {
 76 |    "codemirror_mode": {
 77 |     "name": "ipython",
 78 |     "version": 3
 79 |    },
 80 |    "file_extension": ".py",
 81 |    "mimetype": "text/x-python",
 82 |    "name": "python",
 83 |    "nbconvert_exporter": "python",
 84 |    "pygments_lexer": "ipython3",
 85 |    "version": "3.5.2"
 86 |   },
 87 |   "toc": {
 88 |    "nav_menu": {},
 89 |    "number_sections": true,
 90 |    "sideBar": true,
 91 |    "skip_h1_title": false,
 92 |    "toc_cell": false,
 93 |    "toc_position": {},
 94 |    "toc_section_display": "block",
 95 |    "toc_window_display": false
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 2
100 | }
101 | 


--------------------------------------------------------------------------------
/app/notebooks/model_cross_validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline\n",
 12 |     "from query.scripts.script_util import *\n",
 13 |     "from collections import defaultdict"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def bbox_area(f):\n",
 23 |     "    return (f.bbox_x2 - f.bbox_x1) * (f.bbox_y2 - f.bbox_y1) \n",
 24 |     "\n",
 25 |     "def bbox_midpoint(f):\n",
 26 |     "    return np.array([(f.bbox_x1 + f.bbox_x2) / 2, (f.bbox_y1 + f.bbox_y2) / 2])\n",
 27 |     "\n",
 28 |     "def bbox_dist(f1, f2):\n",
 29 |     "    return np.linalg.norm(bbox_midpoint(f1) - bbox_midpoint(f2))\n",
 30 |     "\n",
 31 |     "def bbox_to_dict(f):\n",
 32 |     "    return {\n",
 33 |     "        'bbox_x1': f.bbox_x1,\n",
 34 |     "        'bbox_x2': f.bbox_x2,\n",
 35 |     "        'bbox_y1': f.bbox_y1,\n",
 36 |     "        'bbox_y2': f.bbox_y2,\n",
 37 |     "        'bbox_score': f.bbox_score,\n",
 38 |     "        'labeler': f.labeler.id\n",
 39 |     "    }\n",
 40 |     "\n",
 41 |     "labeler_names = [l['labeler__name'] for l in FaceInstance.objects.values('labeler__name').distinct()]\n",
 42 |     "print(labeler_names)\n",
 43 |     "\n",
 44 |     "videos = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n",
 45 |     "for frame in Frame.objects.filter(Q(video__show='Situation Room With Wolf Blitzer') | Q(video__show='Special Report With Bret Baier')).select_related('video')[:10000:10]:\n",
 46 |     "    faces = FaceInstance.objects.filter(frame=frame).select_related('labeler')\n",
 47 |     "    for face in faces:\n",
 48 |     "        videos[frame.video.id][frame.id][face.labeler.name].append(face)\n",
 49 |     "\n",
 50 |     "AREA_THRESHOLD = 0.02\n",
 51 |     "DIST_THRESHOLD = 0.10\n",
 52 |     "\n",
 53 |     "mistakes = defaultdict(lambda: defaultdict(tuple))\n",
 54 |     "for video, frames in videos.iteritems():\n",
 55 |     "    for frame, labelers in frames.iteritems():\n",
 56 |     "        for labeler, faces in labelers.iteritems():\n",
 57 |     "            for face in faces:\n",
 58 |     "                if bbox_area(face) < AREA_THRESHOLD:\n",
 59 |     "                    continue\n",
 60 |     "                \n",
 61 |     "                mistake = True\n",
 62 |     "                for other_labeler in labeler_names:\n",
 63 |     "                    if labeler == other_labeler: continue\n",
 64 |     "                    other_faces = labelers[other_labeler] if other_labeler in labelers else []\n",
 65 |     "                    for other_face in other_faces:\n",
 66 |     "                        if bbox_dist(face, other_face) < DIST_THRESHOLD:\n",
 67 |     "                            mistake = False\n",
 68 |     "                            break\n",
 69 |     "                        \n",
 70 |     "                    if mistake and len(other_faces) > 0:\n",
 71 |     "                        mistakes[video][frame] = (faces, other_faces)\n",
 72 |     "                        break\n",
 73 |     "                else:\n",
 74 |     "                    continue\n",
 75 |     "                break\n",
 76 |     "            \n",
 77 |     "result = []\n",
 78 |     "for video, frames in list(mistakes.iteritems())[:1000:10]:\n",
 79 |     "    for frame, (faces, other_faces) in frames.iteritems():\n",
 80 |     "        result.append({\n",
 81 |     "            'video': video,\n",
 82 |     "            'start_frame': frame,\n",
 83 |     "            'bboxes': [bbox_to_dict(f) for f in faces + other_faces]\n",
 84 |     "        })\n",
 85 |     "        \n",
 86 |     "print(len(result))"
 87 |    ]
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "kernelspec": {
 92 |    "display_name": "Django Shell-Plus",
 93 |    "language": "python",
 94 |    "name": "django_extensions"
 95 |   },
 96 |   "language_info": {
 97 |    "codemirror_mode": {
 98 |     "name": "ipython",
 99 |     "version": 2
100 |    },
101 |    "file_extension": ".py",
102 |    "mimetype": "text/x-python",
103 |    "name": "python",
104 |    "nbconvert_exporter": "python",
105 |    "pygments_lexer": "ipython2",
106 |    "version": "2.7.12"
107 |   }
108 |  },
109 |  "nbformat": 4,
110 |  "nbformat_minor": 2
111 | }
112 | 


--------------------------------------------------------------------------------
/app/notebooks/perf.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from query.datasets.prelude import *\n",
10 |     "from timeit import default_timer as now\n",
11 |     "\n",
12 |     "q1 = FaceGender.objects.values('id')\n",
13 |     "q2 = FaceGender.objects.values('id', 'face__person__frame__video__show')    "
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": null,
19 |    "metadata": {},
20 |    "outputs": [],
21 |    "source": [
22 |     "def profile(s, f):\n",
23 |     "    start = now()\n",
24 |     "    f()\n",
25 |     "    print('{}: {:.3f}'.format(s, now() - start))\n",
26 |     "    \n",
27 |     "profile('nojoin', lambda: list(q1))\n",
28 |     "profile('withjoin', lambda: list(q2))"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "q1.print_sql()\n",
38 |     "print('')\n",
39 |     "q1.explain()"
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "code",
44 |    "execution_count": null,
45 |    "metadata": {},
46 |    "outputs": [],
47 |    "source": [
48 |     "q2.print_sql()\n",
49 |     "print('')\n",
50 |     "q2.explain()"
51 |    ]
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Django Shell-Plus",
57 |    "language": "python",
58 |    "name": "django_extensions"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 2
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython2",
70 |    "version": "2.7.14"
71 |   },
72 |   "toc": {
73 |    "nav_menu": {},
74 |    "number_sections": true,
75 |    "sideBar": false,
76 |    "skip_h1_title": false,
77 |    "toc_cell": false,
78 |    "toc_position": {},
79 |    "toc_section_display": "block",
80 |    "toc_window_display": true
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 2
85 | }
86 | 


--------------------------------------------------------------------------------
/app/notebooks/problang/transcript_heuristic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {
 17 |     "ExecuteTime": {
 18 |      "end_time": "2018-12-02T21:13:47.249717Z",
 19 |      "start_time": "2018-12-02T21:13:47.227026Z"
 20 |     }
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "from esper.prelude import *\n",
 25 |     "from transcript_utils import *\n",
 26 |     "from operator import itemgetter"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "ExecuteTime": {
 34 |      "end_time": "2018-12-02T21:12:24.800479Z",
 35 |      "start_time": "2018-12-02T21:12:24.770393Z"
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "all_videos = video_list()"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {
 47 |     "ExecuteTime": {
 48 |      "end_time": "2018-12-02T21:13:49.466216Z",
 49 |      "start_time": "2018-12-02T21:13:47.896796Z"
 50 |     }
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "topic = 'immigration'\n",
 55 |     "lexicon = mutual_info(topic)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "ExecuteTime": {
 63 |      "end_time": "2018-12-02T21:13:53.091644Z",
 64 |      "start_time": "2018-12-02T21:13:50.493133Z"
 65 |     }
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "initial_segments = find_segments(all_videos, lexicon, threshold=20, window_size=SEGMENT_SIZE, stride=SEGMENT_STRIDE)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2018-12-02T21:13:53.119137Z",
 78 |      "start_time": "2018-12-02T21:13:53.094024Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "initial_segments_dict = [{'item_name': seg[0], 'segment': seg[2], 'score': seg[3]} for seg in initial_segments]\n",
 84 |     "initial_segments_dict.sort(key=itemgetter('score'), reverse=True)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "ExecuteTime": {
 92 |      "end_time": "2018-12-02T21:13:53.146327Z",
 93 |      "start_time": "2018-12-02T21:13:53.120848Z"
 94 |     }
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "pcache.set('initial_segments', initial_segments_dict)"
 99 |    ]
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "kernelspec": {
104 |    "display_name": "Django Shell-Plus",
105 |    "language": "python",
106 |    "name": "django_extensions"
107 |   },
108 |   "language_info": {
109 |    "codemirror_mode": {
110 |     "name": "ipython",
111 |     "version": 3
112 |    },
113 |    "file_extension": ".py",
114 |    "mimetype": "text/x-python",
115 |    "name": "python",
116 |    "nbconvert_exporter": "python",
117 |    "pygments_lexer": "ipython3",
118 |    "version": "3.5.2"
119 |   },
120 |   "toc": {
121 |    "base_numbering": 1,
122 |    "nav_menu": {},
123 |    "number_sections": true,
124 |    "sideBar": false,
125 |    "skip_h1_title": false,
126 |    "title_cell": "Table of Contents",
127 |    "title_sidebar": "Contents",
128 |    "toc_cell": true,
129 |    "toc_position": {},
130 |    "toc_section_display": "block",
131 |    "toc_window_display": false
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 2
136 | }
137 | 


--------------------------------------------------------------------------------
/app/notebooks/simple_templates/identity_still_images.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "toc": true
 7 |    },
 8 |    "source": [
 9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"><li><span><a href=\"#Instructions\" data-toc-modified-id=\"Instructions-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Instructions</a></span></li></ul></div>"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {},
16 |    "source": [
17 |     "# Instructions\n",
18 |     "\n",
19 |     "Run the cell below. You will be prompted to input a name."
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {
26 |     "ExecuteTime": {
27 |      "end_time": "2018-08-22T18:36:53.445405Z",
28 |      "start_time": "2018-08-22T18:31:39.674087Z"
29 |     },
30 |     "scrolled": false
31 |    },
32 |    "outputs": [],
33 |    "source": [
34 |     "from esper.identity_clusters import identity_clustering_workflow\n",
35 |     "identity_clustering_workflow(input('Input a name: ').strip(), init_clusters=20)"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": null,
41 |    "metadata": {},
42 |    "outputs": [],
43 |    "source": []
44 |   }
45 |  ],
46 |  "metadata": {
47 |   "kernelspec": {
48 |    "display_name": "Django Shell-Plus",
49 |    "language": "python",
50 |    "name": "django_extensions"
51 |   },
52 |   "language_info": {
53 |    "codemirror_mode": {
54 |     "name": "ipython",
55 |     "version": 3
56 |    },
57 |    "file_extension": ".py",
58 |    "mimetype": "text/x-python",
59 |    "name": "python",
60 |    "nbconvert_exporter": "python",
61 |    "pygments_lexer": "ipython3",
62 |    "version": "3.5.2"
63 |   },
64 |   "toc": {
65 |    "base_numbering": 1,
66 |    "nav_menu": {},
67 |    "number_sections": true,
68 |    "sideBar": false,
69 |    "skip_h1_title": false,
70 |    "title_cell": "Table of Contents",
71 |    "title_sidebar": "Contents",
72 |    "toc_cell": true,
73 |    "toc_position": {},
74 |    "toc_section_display": "block",
75 |    "toc_window_display": false
76 |   }
77 |  },
78 |  "nbformat": 4,
79 |  "nbformat_minor": 2
80 | }
81 | 


--------------------------------------------------------------------------------
/app/notebooks/spark.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from pyspark.sql import SparkSession"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "spark = SparkSession.builder.master(\"spark://spark:7077\").getOrCreate()"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": null,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": [
27 |     "df = spark.createDataFrame([('Alice', 1)])\n",
28 |     "df.show()"
29 |    ]
30 |   }
31 |  ],
32 |  "metadata": {
33 |   "kernelspec": {
34 |    "display_name": "Django Shell-Plus",
35 |    "language": "python",
36 |    "name": "django_extensions"
37 |   },
38 |   "language_info": {
39 |    "codemirror_mode": {
40 |     "name": "ipython",
41 |     "version": 2
42 |    },
43 |    "file_extension": ".py",
44 |    "mimetype": "text/x-python",
45 |    "name": "python",
46 |    "nbconvert_exporter": "python",
47 |    "pygments_lexer": "ipython2",
48 |    "version": "2.7.14"
49 |   },
50 |   "toc": {
51 |    "nav_menu": {},
52 |    "number_sections": true,
53 |    "sideBar": false,
54 |    "skip_h1_title": false,
55 |    "toc_cell": false,
56 |    "toc_position": {
57 |     "height": "47px",
58 |     "left": "1491px",
59 |     "right": "20px",
60 |     "top": "110.5px",
61 |     "width": "185px"
62 |    },
63 |    "toc_section_display": "block",
64 |    "toc_window_display": true
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 2
69 | }
70 | 


--------------------------------------------------------------------------------
/app/notebooks/test_captions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {
 17 |     "ExecuteTime": {
 18 |      "end_time": "2018-12-21T00:03:32.011406Z",
 19 |      "start_time": "2018-12-21T00:00:59.470534Z"
 20 |     }
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "from esper.captions import topic_search, phrase_search, get_lowercase_segments"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {
 31 |     "ExecuteTime": {
 32 |      "end_time": "2018-12-21T00:08:55.818835Z",
 33 |      "start_time": "2018-12-21T00:08:03.211906Z"
 34 |     }
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "result = topic_search(['TACO BELL'])\n",
 39 |     "count = 0\n",
 40 |     "for d in result:\n",
 41 |     "    count += 1\n",
 42 |     "print('Found in {} videos'.format(count))\n",
 43 |     "\n",
 44 |     "video_ids = [v.id for v in Video.objects.all()[:1000]]\n",
 45 |     "result = topic_search(['TACO BELL'], video_ids=video_ids)\n",
 46 |     "count = 0\n",
 47 |     "for d in result:\n",
 48 |     "    count += 1\n",
 49 |     "print('Found in {} videos (of {} ids)'.format(count, len(video_ids)))"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "ExecuteTime": {
 57 |      "end_time": "2018-12-21T00:08:55.903266Z",
 58 |      "start_time": "2018-12-21T00:08:55.822651Z"
 59 |     }
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "result = phrase_search('TACO BELL')\n",
 64 |     "count = 0\n",
 65 |     "for d in result:\n",
 66 |     "    count += 1\n",
 67 |     "print('Found in {} videos'.format(count))\n",
 68 |     "\n",
 69 |     "video_ids = [v.id for v in Video.objects.all()[:1000]]\n",
 70 |     "result = phrase_search('TACO BELL', video_ids=video_ids)\n",
 71 |     "count = 0\n",
 72 |     "for d in result:\n",
 73 |     "    count += 1\n",
 74 |     "print('Found in {} videos (of {} ids)'.format(count, len(video_ids)))"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "ExecuteTime": {
 82 |      "end_time": "2018-12-17T19:27:55.291000Z",
 83 |      "start_time": "2018-12-17T19:27:13.735758Z"
 84 |     },
 85 |     "scrolled": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "count = 0\n",
 90 |     "for document_id, lowercase_ranges in get_lowercase_segments():\n",
 91 |     "    count += 1\n",
 92 |     "    if len(lowercase_ranges) > 0:\n",
 93 |     "        print('{} has {} lowercase ranges'.format(\n",
 94 |     "              document_id, len(lowercase_ranges)))\n",
 95 |     "print('checked {} documents'.format(count))"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Django Shell-Plus",
109 |    "language": "python",
110 |    "name": "django_extensions"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.5.2"
123 |   },
124 |   "toc": {
125 |    "nav_menu": {},
126 |    "number_sections": true,
127 |    "sideBar": false,
128 |    "skip_h1_title": false,
129 |    "toc_cell": true,
130 |    "toc_position": {},
131 |    "toc_section_display": "block",
132 |    "toc_window_display": false
133 |   }
134 |  },
135 |  "nbformat": 4,
136 |  "nbformat_minor": 2
137 | }
138 | 


--------------------------------------------------------------------------------
/app/notebooks/validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Click **Kernel > Restart & Run All**."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from query.datasets.prelude import *\n",
 17 |     "from query.datasets.tvnews.validation import *"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# start: 3164, end: 4281 in 15 min\n",
 27 |     "\n",
 28 |     "\n",
 29 |     "# TODO: get distribution of shot length for M vs. F\n",
 30 |     "\n",
 31 |     "base_face = face_validation('All faces', lambda x: x)\n",
 32 |     "big_face = face_validation(\n",
 33 |     "    'Faces height > 0.2', lambda qs: qs.annotate(height=F('bbox_y2') - F('bbox_y1')).filter(height__gte=0.2))\n",
 34 |     "fg_face = face_validation(\n",
 35 |     "    'Faces in foreground', lambda qs: qs.filter(background=False))"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {
 42 |     "scrolled": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "all_gender_accuracy, all_gender_cmat = gender_validation('All gender', base_face)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "fg_gender_accuracy, fg_gender_cmat = gender_validation('Gender w/ faces in foreground', fg_face)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "big_gender_accuracy, big_gender_cmat = gender_validation('Gender w/ face height > 0.2', big_face)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "screentime_validation('All faces', lambda x: x, all_gender_cmat)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "screentime_validation(\n",
 83 |     "    'Face height > 0.2',\n",
 84 |     "    lambda qs: qs.annotate(height=F('face__bbox_y2') - F('face__bbox_y1')).filter(height__gte=0.2),\n",
 85 |     "    big_gender_cmat)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "speaking_validation()"
 95 |    ]
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "kernelspec": {
100 |    "display_name": "Django Shell-Plus",
101 |    "language": "python",
102 |    "name": "django_extensions"
103 |   },
104 |   "language_info": {
105 |    "codemirror_mode": {
106 |     "name": "ipython",
107 |     "version": 2
108 |    },
109 |    "file_extension": ".py",
110 |    "mimetype": "text/x-python",
111 |    "name": "python",
112 |    "nbconvert_exporter": "python",
113 |    "pygments_lexer": "ipython2",
114 |    "version": "2.7.14"
115 |   },
116 |   "toc": {
117 |    "nav_menu": {},
118 |    "number_sections": true,
119 |    "sideBar": false,
120 |    "skip_h1_title": false,
121 |    "toc_cell": false,
122 |    "toc_position": {},
123 |    "toc_section_display": "none",
124 |    "toc_window_display": false
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 2
129 | }
130 | 


--------------------------------------------------------------------------------
/app/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "esper",
 3 |   "description": "Video search engine and interface",
 4 |   "version": "1.0.0",
 5 |   "repository": {
 6 |     "type": "git",
 7 |     "url": "git+https://github.com/scanner-research/esper.git"
 8 |   },
 9 |   "author": "Will Crichton",
10 |   "license": "Apache-2.0",
11 |   "bugs": {
12 |     "url": "https://github.com/scanner-research/esper/issues"
13 |   },
14 |   "homepage": "https://github.com/scanner-research/esper#readme",
15 |   "scripts": {
16 |     "build": "webpack",
17 |     "watch": "webpack -w",
18 |     "prepublish": "webpack -p"
19 |   },
20 |   "devDependencies": {
21 |     "axios": "^0.16.1",
22 |     "babel-core": "^6.24.1",
23 |     "babel-loader": "^7.0.0",
24 |     "babel-plugin-transform-decorators-legacy": "^1.3.4",
25 |     "babel-preset-env": "^1.6.1",
26 |     "babel-preset-react": "^6.24.1",
27 |     "babel-preset-stage-0": "^6.24.1",
28 |     "bootstrap": "^3.3.7",
29 |     "create-react-class": "^15.6.3",
30 |     "css-loader": "^0.28.1",
31 |     "extract-text-webpack-plugin": "^2.1.0",
32 |     "file-loader": "^0.11.1",
33 |     "jquery": "^3.3.1",
34 |     "left-pad": "^1.1.3",
35 |     "lodash": "^4.17.4",
36 |     "mobx": "^4.8.0",
37 |     "mobx-react": "^5.4.3",
38 |     "node-sass": "^4.5.2",
39 |     "open-iconic": "^1.1.1",
40 |     "react": "^16.3.2",
41 |     "react-ace": "^5.4.0",
42 |     "react-bootstrap": "0.31.3",
43 |     "react-dom": "^16.3.2",
44 |     "sass-loader": "^6.0.5",
45 |     "source-map-loader": "^0.2.4",
46 |     "url-loader": "^0.5.8",
47 |     "webpack": "^2.5.1",
48 |     "webpack-bundle-tracker": "^0.2.0"
49 |   },
50 |   "config": {
51 |     "unsafe-perm": true
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/app/query/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/__init__.py


--------------------------------------------------------------------------------
/app/query/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | 
3 | from .models import *
4 | 


--------------------------------------------------------------------------------
/app/query/apps.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | from django.apps import AppConfig
4 | 
5 | 
6 | class QueryConfig(AppConfig):
7 |     name = 'query'
8 | 


--------------------------------------------------------------------------------
/app/query/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/management/__init__.py


--------------------------------------------------------------------------------
/app/query/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/management/commands/__init__.py


--------------------------------------------------------------------------------
/app/query/management/commands/run.py:
--------------------------------------------------------------------------------
 1 | from django.core.management.base import BaseCommand, CommandError
 2 | import imp
 3 | import sys
 4 | 
 5 | class Command(BaseCommand):
 6 |     help = 'Run a script'
 7 | 
 8 |     def add_arguments(self, parser):
 9 |         parser.add_argument('path')
10 |         parser.add_argument('args', nargs='*')
11 | 
12 |     def handle(self, *args, **options):
13 |         sys.argv = tuple([options['path']] + list(args))
14 |         imp.load_source('__main__', options['path'])
15 | 


--------------------------------------------------------------------------------
/app/query/migrations/0002_auto_20180529_1049.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-05-29 10:49
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0001_initial'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='faceidentity',
17 |             name='probability',
18 |             field=models.FloatField(default=1.0),
19 |         ),
20 |         migrations.AddField(
21 |             model_name='labeler',
22 |             name='created',
23 |             field=models.DateTimeField(auto_now_add=True, null=True),
24 |         ),
25 |         migrations.AddField(
26 |             model_name='labeler',
27 |             name='data_path',
28 |             field=models.CharField(blank=True, max_length=256, null=True),
29 |         ),
30 |     ]
31 | 


--------------------------------------------------------------------------------
/app/query/migrations/0003_show_hosts.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-04 19:50
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0002_auto_20180529_1049'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='show',
17 |             name='hosts',
18 |             field=models.ManyToManyField(blank=True, to='query.Thing'),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0004_auto_20180612_1255.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-12 12:55
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0003_show_hosts'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='CanonicalShow',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('name', models.CharField(max_length=256)),
21 |                 ('is_prime_time', models.BooleanField(default=False)),
22 |                 ('is_weekend', models.BooleanField(default=False)),
23 |                 ('is_recurring', models.BooleanField(default=False)),
24 |             ],
25 |         ),
26 |         migrations.AddField(
27 |             model_name='show',
28 |             name='canonical_show',
29 |             field=models.ForeignKey(blank=True, default=None, null=True, on_delete=django.db.models.deletion.CASCADE, to='query.CanonicalShow'),
30 |         ),
31 |     ]
32 | 


--------------------------------------------------------------------------------
/app/query/migrations/0005_auto_20180612_1316.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-12 13:16
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0004_auto_20180612_1255'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RemoveField(
16 |             model_name='canonicalshow',
17 |             name='is_prime_time',
18 |         ),
19 |         migrations.RemoveField(
20 |             model_name='canonicalshow',
21 |             name='is_weekend',
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/app/query/migrations/0006_auto_20180612_1317.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-12 13:17
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0005_auto_20180612_1316'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.AlterField(
17 |             model_name='show',
18 |             name='canonical_show',
19 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.CanonicalShow'),
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/app/query/migrations/0007_canonicalshow_hosts.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-15 00:40
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0006_auto_20180612_1317'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='canonicalshow',
17 |             name='hosts',
18 |             field=models.ManyToManyField(blank=True, to='query.Thing'),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0008_auto_20180625_1033.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-25 10:33
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0007_canonicalshow_hosts'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='face',
17 |             name='probability',
18 |             field=models.FloatField(default=1.0),
19 |         ),
20 |         migrations.AddField(
21 |             model_name='facegender',
22 |             name='probability',
23 |             field=models.FloatField(default=1.0),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/app/query/migrations/0009_object.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-25 11:47
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0008_auto_20180625_1033'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='Object',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('bbox_x1', models.FloatField()),
21 |                 ('bbox_x2', models.FloatField()),
22 |                 ('bbox_y1', models.FloatField()),
23 |                 ('bbox_y2', models.FloatField()),
24 |                 ('bbox_score', models.FloatField()),
25 |                 ('cls', models.IntegerField()),
26 |                 ('probability', models.FloatField()),
27 |                 ('frame', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame')),
28 |             ],
29 |             options={
30 |                 'abstract': False,
31 |             },
32 |         ),
33 |     ]
34 | 


--------------------------------------------------------------------------------
/app/query/migrations/0010_auto_20180625_1152.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-25 11:52
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0009_object'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RenameField(
16 |             model_name='object',
17 |             old_name='cls',
18 |             new_name='label',
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0011_auto_20180625_1157.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-06-25 11:57
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0010_auto_20180625_1152'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RemoveField(
16 |             model_name='face',
17 |             name='bbox_score',
18 |         ),
19 |         migrations.RemoveField(
20 |             model_name='object',
21 |             name='bbox_score',
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/app/query/migrations/0012_auto_20180712_1252.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-07-12 12:52
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0011_auto_20180625_1157'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name='faceidentity',
17 |             name='probability',
18 |             field=models.FloatField(db_index=True, default=1.0),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0013_auto_20180825_2234.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-08-25 22:34
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0012_auto_20180712_1252'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='video',
17 |             name='threeyears_dataset',
18 |             field=models.BooleanField(default=False),
19 |         ),
20 |         migrations.AlterField(
21 |             model_name='faceidentity',
22 |             name='probability',
23 |             field=models.FloatField(default=1.0),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/app/query/migrations/0014_auto_20180926_1514.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 15:14
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0013_auto_20180825_2234'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.RemoveField(
17 |             model_name='speaker',
18 |             name='gender',
19 |         ),
20 |         migrations.RemoveField(
21 |             model_name='speaker',
22 |             name='identity',
23 |         ),
24 |         migrations.RemoveField(
25 |             model_name='speaker',
26 |             name='labeler',
27 |         ),
28 |         migrations.RemoveField(
29 |             model_name='speaker',
30 |             name='video',
31 |         ),
32 |         migrations.RemoveField(
33 |             model_name='identity',
34 |             name='thing',
35 |         ),
36 |         migrations.AddField(
37 |             model_name='faceidentity',
38 |             name='identity2',
39 |             field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Identity'),
40 |         ),
41 |         migrations.AlterField(
42 |             model_name='identity',
43 |             name='name',
44 |             field=models.CharField(default='', max_length=256),
45 |             preserve_default=False,
46 |         ),
47 |         migrations.DeleteModel(
48 |             name='Speaker',
49 |         ),
50 |     ]
51 | 


--------------------------------------------------------------------------------
/app/query/migrations/0015_auto_20180926_1902.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 19:02
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0014_auto_20180926_1514'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.RemoveField(
17 |             model_name='faceidentity',
18 |             name='identity',
19 |         ),
20 |         migrations.AlterField(
21 |             model_name='faceidentity',
22 |             name='identity2',
23 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Identity'),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/app/query/migrations/0016_auto_20180926_2037.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 20:37
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0015_auto_20180926_1902'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='canonicalshow',
17 |             name='hosts2',
18 |             field=models.ManyToManyField(blank=True, to='query.Identity'),
19 |         ),
20 |         migrations.AddField(
21 |             model_name='show',
22 |             name='hosts2',
23 |             field=models.ManyToManyField(blank=True, to='query.Identity'),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/app/query/migrations/0017_auto_20180926_2045.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 20:45
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0016_auto_20180926_2037'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RemoveField(
16 |             model_name='canonicalshow',
17 |             name='hosts',
18 |         ),
19 |         migrations.RemoveField(
20 |             model_name='show',
21 |             name='hosts',
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/app/query/migrations/0018_auto_20180926_2045.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 20:45
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0017_auto_20180926_2045'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RenameField(
16 |             model_name='canonicalshow',
17 |             old_name='hosts2',
18 |             new_name='hosts',
19 |         ),
20 |         migrations.RenameField(
21 |             model_name='faceidentity',
22 |             old_name='identity2',
23 |             new_name='identity',
24 |         ),
25 |         migrations.RenameField(
26 |             model_name='show',
27 |             old_name='hosts2',
28 |             new_name='hosts',
29 |         ),
30 |     ]
31 | 


--------------------------------------------------------------------------------
/app/query/migrations/0019_topic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 20:47
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0018_auto_20180926_2045'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.CreateModel(
16 |             name='Topic',
17 |             fields=[
18 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
19 |                 ('name', models.CharField(max_length=256)),
20 |             ],
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/app/query/migrations/0020_segment_topics.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 20:49
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0019_topic'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='segment',
17 |             name='topics',
18 |             field=models.ManyToManyField(to='query.Topic'),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0021_auto_20180926_2104.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-09-26 21:04
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0020_segment_topics'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RemoveField(
16 |             model_name='thing',
17 |             name='type',
18 |         ),
19 |         migrations.RemoveField(
20 |             model_name='segment',
21 |             name='things',
22 |         ),
23 |         migrations.DeleteModel(
24 |             name='Thing',
25 |         ),
26 |         migrations.DeleteModel(
27 |             name='ThingType',
28 |         ),
29 |     ]
30 | 


--------------------------------------------------------------------------------
/app/query/migrations/0022_auto_20181026_1002.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-10-26 10:02
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0021_auto_20180926_2104'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='TimeSegment',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('start', models.IntegerField()),
21 |                 ('end', models.IntegerField()),
22 |             ],
23 |         ),
24 |         migrations.CreateModel(
25 |             name='LabeledCommercial',
26 |             fields=[
27 |                 ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')),
28 |             ],
29 |             bases=('query.timesegment',),
30 |         ),
31 |         migrations.CreateModel(
32 |             name='LabeledInterview',
33 |             fields=[
34 |                 ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')),
35 |                 ('original', models.BooleanField(default=True)),
36 |                 ('scattered_clips', models.BooleanField(default=False)),
37 |                 ('interviewer1', models.CharField(blank=True, default=None, max_length=256, null=True)),
38 |                 ('interviewer2', models.CharField(blank=True, default=None, max_length=256, null=True)),
39 |                 ('guest1', models.CharField(blank=True, default=None, max_length=256, null=True)),
40 |                 ('guest2', models.CharField(blank=True, default=None, max_length=256, null=True)),
41 |             ],
42 |             bases=('query.timesegment',),
43 |         ),
44 |         migrations.CreateModel(
45 |             name='LabeledPanel',
46 |             fields=[
47 |                 ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')),
48 |                 ('num_panelists', models.IntegerField()),
49 |             ],
50 |             bases=('query.timesegment',),
51 |         ),
52 |         migrations.AddField(
53 |             model_name='timesegment',
54 |             name='video',
55 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video'),
56 |         ),
57 |     ]
58 | 


--------------------------------------------------------------------------------
/app/query/migrations/0022_frame_shot_boundary.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-10-28 18:46
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0021_auto_20180926_2104'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='frame',
17 |             name='shot_boundary',
18 |             field=models.BooleanField(default=True),
19 |             preserve_default=False,
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/app/query/migrations/0023_auto_20181026_1010.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-10-26 10:10
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0022_auto_20181026_1002'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RemoveField(
16 |             model_name='labeledcommercial',
17 |             name='timesegment_ptr',
18 |         ),
19 |         migrations.RemoveField(
20 |             model_name='labeledinterview',
21 |             name='timesegment_ptr',
22 |         ),
23 |         migrations.RemoveField(
24 |             model_name='labeledpanel',
25 |             name='timesegment_ptr',
26 |         ),
27 |         migrations.RemoveField(
28 |             model_name='timesegment',
29 |             name='video',
30 |         ),
31 |         migrations.DeleteModel(
32 |             name='LabeledCommercial',
33 |         ),
34 |         migrations.DeleteModel(
35 |             name='LabeledInterview',
36 |         ),
37 |         migrations.DeleteModel(
38 |             name='LabeledPanel',
39 |         ),
40 |         migrations.DeleteModel(
41 |             name='TimeSegment',
42 |         ),
43 |     ]
44 | 


--------------------------------------------------------------------------------
/app/query/migrations/0023_auto_20181101_2104.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-11-01 21:04
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0022_frame_shot_boundary'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.AddField(
17 |             model_name='face',
18 |             name='frame',
19 |             field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Frame'),
20 |         ),
21 |         migrations.AddField(
22 |             model_name='pose',
23 |             name='frame',
24 |             field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Frame'),
25 |         ),
26 |         migrations.AlterUniqueTogether(
27 |             name='face',
28 |             unique_together=set([]),
29 |         ),
30 |         migrations.AlterUniqueTogether(
31 |             name='pose',
32 |             unique_together=set([]),
33 |         ),
34 |     ]
35 | 


--------------------------------------------------------------------------------
/app/query/migrations/0024_auto_20181103_2019.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-11-03 20:19
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0023_auto_20181101_2104'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.RemoveField(
17 |             model_name='person',
18 |             name='frame',
19 |         ),
20 |         migrations.RemoveField(
21 |             model_name='pose',
22 |             name='person',
23 |         ),
24 |         migrations.AlterField(
25 |             model_name='face',
26 |             name='frame',
27 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame'),
28 |         ),
29 |         migrations.AlterField(
30 |             model_name='pose',
31 |             name='frame',
32 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame'),
33 |         ),
34 |         migrations.RemoveField(
35 |             model_name='face',
36 |             name='person',
37 |         ),
38 |         migrations.AlterUniqueTogether(
39 |             name='face',
40 |             unique_together=set([('labeler', 'frame', 'bbox_x1', 'bbox_x2', 'bbox_y1', 'bbox_y2')]),
41 |         ),
42 |         migrations.DeleteModel(
43 |             name='Person',
44 |         ),
45 |     ]
46 | 


--------------------------------------------------------------------------------
/app/query/migrations/0024_labeledcommercial_labeledinterview_labeledpanel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-10-26 10:12
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0023_auto_20181026_1010'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='LabeledCommercial',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('start', models.FloatField()),
21 |                 ('end', models.FloatField()),
22 |                 ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')),
23 |             ],
24 |         ),
25 |         migrations.CreateModel(
26 |             name='LabeledInterview',
27 |             fields=[
28 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
29 |                 ('start', models.FloatField()),
30 |                 ('end', models.FloatField()),
31 |                 ('interviewer1', models.CharField(blank=True, default=None, max_length=256, null=True)),
32 |                 ('interviewer2', models.CharField(blank=True, default=None, max_length=256, null=True)),
33 |                 ('guest1', models.CharField(blank=True, default=None, max_length=256, null=True)),
34 |                 ('guest2', models.CharField(blank=True, default=None, max_length=256, null=True)),
35 |                 ('original', models.BooleanField(default=True)),
36 |                 ('scattered_clips', models.BooleanField(default=False)),
37 |                 ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')),
38 |             ],
39 |         ),
40 |         migrations.CreateModel(
41 |             name='LabeledPanel',
42 |             fields=[
43 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
44 |                 ('start', models.FloatField()),
45 |                 ('end', models.FloatField()),
46 |                 ('num_panelists', models.IntegerField()),
47 |                 ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')),
48 |             ],
49 |         ),
50 |     ]
51 | 


--------------------------------------------------------------------------------
/app/query/migrations/0025_merge_20181227_1812.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-12-27 18:12
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0024_labeledcommercial_labeledinterview_labeledpanel'),
12 |         ('query', '0024_auto_20181103_2019'),
13 |     ]
14 | 
15 |     operations = [
16 |     ]
17 | 


--------------------------------------------------------------------------------
/app/query/migrations/0026_auto_20181227_1812.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-12-27 18:12
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0025_merge_20181227_1812'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='HairColor',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |             ],
21 |         ),
22 |         migrations.CreateModel(
23 |             name='HairColorName',
24 |             fields=[
25 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
26 |                 ('name', models.CharField(max_length=256)),
27 |             ],
28 |         ),
29 |         migrations.AddField(
30 |             model_name='haircolor',
31 |             name='color',
32 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.HairColorName'),
33 |         ),
34 |         migrations.AddField(
35 |             model_name='haircolor',
36 |             name='face',
37 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face'),
38 |         ),
39 |         migrations.AddField(
40 |             model_name='haircolor',
41 |             name='labeler',
42 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler'),
43 |         ),
44 |         migrations.AlterUniqueTogether(
45 |             name='haircolor',
46 |             unique_together=set([('labeler', 'face')]),
47 |         ),
48 |     ]
49 | 


--------------------------------------------------------------------------------
/app/query/migrations/0027_auto_20181227_1829.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-12-27 18:29
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0026_auto_20181227_1812'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name='haircolorname',
17 |             name='name',
18 |             field=models.CharField(max_length=256, unique=True),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0028_auto_20181228_1555.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2018-12-28 15:55
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0027_auto_20181227_1829'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name='canonicalshow',
17 |             name='name',
18 |             field=models.CharField(max_length=256, unique=True),
19 |         ),
20 |         migrations.AlterField(
21 |             model_name='channel',
22 |             name='name',
23 |             field=models.CharField(max_length=256, unique=True),
24 |         ),
25 |         migrations.AlterField(
26 |             model_name='identity',
27 |             name='name',
28 |             field=models.CharField(max_length=256, unique=True),
29 |         ),
30 |         migrations.AlterField(
31 |             model_name='show',
32 |             name='name',
33 |             field=models.CharField(max_length=256, unique=True),
34 |         ),
35 |         migrations.AlterField(
36 |             model_name='topic',
37 |             name='name',
38 |             field=models.CharField(max_length=256, unique=True),
39 |         ),
40 |     ]
41 | 


--------------------------------------------------------------------------------
/app/query/migrations/0028_auto_20190101_2032.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-01-01 20:32
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0027_auto_20181227_1829'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='Clothing',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |             ],
21 |         ),
22 |         migrations.CreateModel(
23 |             name='ClothingName',
24 |             fields=[
25 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
26 |                 ('name', models.CharField(max_length=256, unique=True)),
27 |             ],
28 |         ),
29 |         migrations.AddField(
30 |             model_name='clothing',
31 |             name='clothing',
32 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.ClothingName'),
33 |         ),
34 |         migrations.AddField(
35 |             model_name='clothing',
36 |             name='face',
37 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face'),
38 |         ),
39 |         migrations.AddField(
40 |             model_name='clothing',
41 |             name='labeler',
42 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler'),
43 |         ),
44 |         migrations.AlterUniqueTogether(
45 |             name='clothing',
46 |             unique_together=set([('labeler', 'face')]),
47 |         ),
48 |     ]
49 | 


--------------------------------------------------------------------------------
/app/query/migrations/0029_video_duplicate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-01-03 19:56
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0028_auto_20181228_1555'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='video',
17 |             name='duplicate',
18 |             field=models.BooleanField(default=False),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0030_video_corrupted.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-01-03 20:14
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0029_video_duplicate'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name='video',
17 |             name='corrupted',
18 |             field=models.BooleanField(default=False),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/app/query/migrations/0031_merge_20190105_1016.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-01-05 10:16
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 | 
10 |     dependencies = [
11 |         ('query', '0028_auto_20190101_2032'),
12 |         ('query', '0030_video_corrupted'),
13 |     ]
14 | 
15 |     operations = [
16 |     ]
17 | 


--------------------------------------------------------------------------------
/app/query/migrations/0032_auto_20190106_1451.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-01-06 14:51
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0031_merge_20190105_1016'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='HairLength',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('face', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face')),
21 |                 ('labeler', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler')),
22 |             ],
23 |         ),
24 |         migrations.CreateModel(
25 |             name='HairLengthName',
26 |             fields=[
27 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
28 |                 ('name', models.CharField(max_length=256, unique=True)),
29 |             ],
30 |         ),
31 |         migrations.AddField(
32 |             model_name='hairlength',
33 |             name='length',
34 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.HairLengthName'),
35 |         ),
36 |         migrations.AlterUniqueTogether(
37 |             name='hairlength',
38 |             unique_together=set([('labeler', 'face')]),
39 |         ),
40 |     ]
41 | 


--------------------------------------------------------------------------------
/app/query/migrations/0033_auto_20190311_1730.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-03-11 17:30
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0032_auto_20190106_1451'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='FaceTag',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('score', models.FloatField(default=1.0)),
21 |                 ('face', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face')),
22 |                 ('labeler', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler')),
23 |                 ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Tag')),
24 |             ],
25 |         ),
26 |         migrations.CreateModel(
27 |             name='IdentityTag',
28 |             fields=[
29 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
30 |                 ('name', models.CharField(max_length=256, unique=True)),
31 |             ],
32 |         ),
33 |         migrations.AddField(
34 |             model_name='identity',
35 |             name='tags',
36 |             field=models.ManyToManyField(blank=True, to='query.IdentityTag'),
37 |         ),
38 |         migrations.AlterUniqueTogether(
39 |             name='facetag',
40 |             unique_together=set([('labeler', 'face')]),
41 |         ),
42 |     ]
43 | 


--------------------------------------------------------------------------------
/app/query/migrations/0034_auto_20190711_1442.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by Django 1.11 on 2019-07-11 14:42
 3 | from __future__ import unicode_literals
 4 | 
 5 | from django.db import migrations, models
 6 | import django.db.models.deletion
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 | 
11 |     dependencies = [
12 |         ('query', '0033_auto_20190311_1730'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name='FrameSampler',
18 |             fields=[
19 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20 |                 ('name', models.CharField(max_length=256)),
21 |             ],
22 |         ),
23 |         migrations.AddField(
24 |             model_name='frame',
25 |             name='sampler',
26 |             field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='query.FrameSampler'),
27 |         ),
28 |     ]
29 | 


--------------------------------------------------------------------------------
/app/query/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/migrations/__init__.py


--------------------------------------------------------------------------------
/app/query/templates/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>Esper</title>
 5 |     <script>
 6 |      var CSRF_TOKEN = "{{ csrf_token }}";
 7 |      var GLOBALS = {{ globals|safe }};
 8 |     </script>
 9 |     <link href="https://fonts.googleapis.com/css?family=Lato" rel="stylesheet">
10 |     <link href="/static/bundles/styles.css" rel="stylesheet" />
11 |     <link id="dynamic-favicon" rel="shortcut icon" type="image/png" href="/static/favicon.ico" />
12 |   </head>
13 |   <body class="esper">
14 |     <div class="container" id="app">
15 |       {% block content %}{% endblock %}
16 |     </div>
17 |   </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/app/query/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {% load render_bundle from webpack_loader %}
3 | 
4 | {% block content %}
5 | {% render_bundle 'web' %}
6 | {% endblock %}
7 | 


--------------------------------------------------------------------------------
/app/query/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | 
3 | # Create your tests here.
4 | 


--------------------------------------------------------------------------------
/app/query/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import url
 2 | from . import views
 3 | 
 4 | urlpatterns = [
 5 |     url(r'^api/search', views.search, name='search'),
 6 |     url(r'^api/schema', views.schema, name='schema'),
 7 |     url(r'^api/subtitles', views.subtitles, name='subtitles'),
 8 |     url(r'^api/labeled', views.labeled, name='labeled'),
 9 |     url(r'^api/newthings', views.newthings, name='newthings'),
10 |     url(r'^', views.index, name='index')
11 | ]
12 | 


--------------------------------------------------------------------------------
/app/requirements.app.txt:
--------------------------------------------------------------------------------
 1 | jupyter-console==5.2.0
 2 | jupyter==1.0.0
 3 | psycopg2==2.6.2
 4 | django==1.11
 5 | gunicorn==19.6.0
 6 | django-webpack-loader==0.5.0
 7 | progressbar2==3.34.2
 8 | django-bulk-update==2.2.0
 9 | beakerx
10 | matplotlib==2.1.0
11 | seaborn==0.8.1
12 | django-extensions==1.9.6
13 | jupyter_contrib_nbextensions==0.3.3
14 | jupyter_nbextensions_configurator==0.2.8
15 | autopep8
16 | pysrt==1.1.1
17 | sqlparse==0.2.4
18 | internetarchive
19 | pyspark==2.4.0
20 | pyopenssl
21 | statsmodels
22 | scikit-learn
23 | tqdm
24 | google_images_download
25 | psutil
26 | redis
27 | msgpack
28 | pickle_cache
29 | iterextras
30 | qgrid
31 | ipyparallel
32 | 
33 | #pyro-ppl==0.2.1
34 | #vega==0.5.0
35 | #textacy
36 | #gensim
37 | 


--------------------------------------------------------------------------------
/app/requirements.base.txt:
--------------------------------------------------------------------------------
1 | dill==0.2.7.1
2 | unionfind==0.0.9
3 | numpy==1.15.4
4 | pandas==0.23.4
5 | torch==0.3.1
6 | 


--------------------------------------------------------------------------------
/app/scripts/build-tf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [ "$build_tf" = "on" ]; then
 5 |     # Install bazel
 6 |     apt-get update && apt-get install -y openjdk-8-jdk mlocate
 7 |     echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | \
 8 |         tee /etc/apt/sources.list.d/bazel.list
 9 |     curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
10 |     apt-get update && apt-get install -y bazel
11 | 
12 |     # Install python deps
13 |     # keras_*: https://github.com/bazelbuild/continuous-integration/issues/305
14 |     pip3 install six numpy wheel keras_applications keras_preprocessing
15 | 
16 |     git clone -b v${tf_version} https://github.com/tensorflow/tensorflow/
17 |     cd tensorflow
18 |     updatedb
19 | 
20 |     if [ "$device" = "cpu" ]; then
21 |         # TODO(wcrichto): getting internal errors w/ MKL on GCE
22 | 
23 |         PYTHON_BIN_PATH=$(which python3) \
24 |                        PYTHON_LIB_PATH=/usr/local/lib/python3.5/dist-packages \
25 |                        TF_NEED_MKL=0 \
26 |                        CC_OPT_FLAGS=-march=core-avx2 \
27 |                        TF_NEED_GCP=0 \
28 |                        TF_NEED_S3=0 \
29 |                        TF_NEED_GDR=0 \
30 |                        TF_NEED_MPI=0 \
31 |                        TF_NEED_HDFS=0 \
32 |                        TF_ENABLE_XLA=0 \
33 |                        TF_NEED_VERBS=0 \
34 |                        TF_NEED_OPENCL=0 \
35 |                        TF_NEED_CUDA=0 \
36 |                        TF_NEED_IGNITE=0 \
37 |                        TF_NEED_OPENCL_SYCL=0 \
38 |                        TF_NEED_ROCM=0 \
39 |                        TF_DOWNLOAD_CLANG=0 \
40 |                        TF_SET_ANDROID_WORKSPACE=0 \
41 |                        ./configure
42 | 
43 |         # ares: https://github.com/tensorflow/tensorflow/issues/23402#issuecomment-436932197
44 |         bazel build \
45 |               --config=opt \
46 |               --define=grpc_no_ares=true \
47 |               --incompatible_remove_native_http_archive=false \
48 |               //tensorflow/tools/pip_package:build_pip_package
49 |     else
50 |         echo "No GPU TF support yet"
51 |         exit 1
52 |     fi
53 | 
54 |     bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
55 |     pip3 install /tmp/tensorflow_pkg/*
56 |     cd ..
57 |     rm -rf tensorflow
58 | 
59 | else
60 |     if [ "$device" = "cpu" ]; then
61 |         pip3 install tensorflow==${tf_version};
62 |     else
63 |         pip3 install tensorflow-gpu==${tf_version};
64 |     fi
65 | fi
66 | 


--------------------------------------------------------------------------------
/app/scripts/clear-proxy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # We pass https_proxy from host to container by default. However, if host doesn't have proxy set, then https_proxy
 4 | # is set to an empty string in the container. This causes grpc to raise annoying (but benign) errors whenever it's
 5 | # run, namely through Scanner. This script unsets those variables if they are set to a blank string to avoid this.
 6 | 
 7 | # Also, note that this script MUST be run in the form:
 8 | #   $ . ./scripts/clear-proxy
 9 | # i.e. with the dot in front, as this ensures that the unset clears variables in the caller's shell, not in just
10 | # this the context of this script.
11 | 
12 | if [ -z "$https_proxy" ]; then
13 |    unset https_proxy;
14 |    unset http_proxy;
15 | fi
16 | 


--------------------------------------------------------------------------------
/app/scripts/create-sample.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # This script is a reproducible way to create the sample DB provided for the demo. Requires a GPU.
 5 | 
 6 | rm -rf example-dataset.tar.gz example.mp4 paths db-dump.sql assets/thumbnails scanner_db
 7 | youtube-dl "https://www.youtube.com/watch?v=dQw4w9WgXcQ" -f mp4 -o example.mp4
 8 | echo "example.mp4" > paths
 9 | python manage.py cleanup Video
10 | python manage.py cleanup PersonTrack
11 | esper-run query/datasets/default/ingest.py
12 | python manage.py face_detect paths
13 | python manage.py pose_detect paths
14 | python manage.py gender_scanner paths
15 | python manage.py embed_faces_scanner paths tinyfaces
16 | python manage.py track_face paths
17 | esper-run query/datasets/default/export.py
18 | tar -czf example-dataset.tar.gz paths assets/thumbnails db-dump.sql scanner_db
19 | 


--------------------------------------------------------------------------------
/app/scripts/export-table.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | TABLE=$1
3 | echo "\copy (SELECT * FROM $TABLE) TO '/app/data/pg/$TABLE.csv' WITH CSV HEADER;" | psql -h db esper will
4 | 


--------------------------------------------------------------------------------
/app/scripts/generate-protobuf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pushd /app
4 | cargo install protobuf-codegen -q || true
5 | protoc --python_out=esper --rust_out=subserver/src -I=.deps datatypes.proto
6 | popd
7 | 


--------------------------------------------------------------------------------
/app/scripts/google-setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # This is for Docker's use only. Don't run this yourself.
4 | 
5 | if [[ ! -z $GOOGLE_PROJECT ]]; then
6 |    gcloud config set project ${GOOGLE_PROJECT}
7 |    gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS
8 | fi
9 | 


--------------------------------------------------------------------------------
/app/scripts/jupyter-setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | python3 /app/scripts/set-jupyter-password.py
4 | 


--------------------------------------------------------------------------------
/app/scripts/load-cloud-db.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | pg_dump -h db -U will esper > cloud_db.sql
 4 | 
 5 | # read -p "This will delete everything in your local database. Are you sure? [y/N] " choice
 6 | # case "$choice" in
 7 | #     y|Y )
 8 | #         echo "Resetting database with cloud data"
 9 | #         FLAGS="-h db -U ${DJANGO_DB_USER}"
10 | #         #echo "drop database esper; create database esper;" | psql ${FLAGS} -d postgres
11 | #         mysqldump --set-gtid-purged=off -h db-cloud -u will ${DB_NAME} | ${MYSQL};;
12 | #     * )
13 | #         echo "Not doing anything"
14 | # esac
15 | 


--------------------------------------------------------------------------------
/app/scripts/set-jupyter-password.py:
--------------------------------------------------------------------------------
 1 | import pexpect
 2 | import os
 3 | 
 4 | # This script changes Jupyter from using a random hash by default to a custom password, set in configure.py.
 5 | 
 6 | if 'JUPYTER_PASSWORD' not in os.environ:
 7 |     exit()
 8 | 
 9 | JUPYTER_DIR = '/root/.jupyter'
10 | if not os.path.isdir(JUPYTER_DIR):
11 |     os.mkdir(JUPYTER_DIR)
12 | 
13 | p = pexpect.spawn('jupyter notebook password')
14 | p.expect('Enter password: ')
15 | p.sendline(os.environ['JUPYTER_PASSWORD'])
16 | p.sendline(os.environ['JUPYTER_PASSWORD'])
17 | p.read()
18 | 


--------------------------------------------------------------------------------
/app/scripts/transfer-to-bigquery.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | TABLE=$1
3 | gsutil cp /app/data/pg/$TABLE.csv gs://esper/tmp/$TABLE.csv
4 | bq load --autodetect --source_format=CSV tvnews.$TABLE gs://esper/tmp/$TABLE.csv
5 | gsutil rm gs://esper/tmp/$TABLE.csv
6 | 


--------------------------------------------------------------------------------
/app/scripts/upload-sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | gsutil mv app/example-dataset.tar.gz gs://esper/
5 | gsutil acl ch -u AllUsers:R gs://esper/example-dataset.tar.gz
6 | 


--------------------------------------------------------------------------------
/app/test/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains tests for `esper/app/esper` using the Python3
 2 | [unittest](http://docs.python.org/3/library/unittest.html) framework.
 3 | 
 4 | To add a test, add a new Python script and create classes that inherit from
 5 | `unittest.TestCase`. These classes should have functions that start with
 6 | `test`.
 7 | 
 8 | To run the tests:
 9 | 
10 | From the `esper/app` directory, run `python3 -m unittest discover test`.
11 | 
12 | This directory currently has no tests, but it's still around for people to add
13 | tests in the future.
14 | 


--------------------------------------------------------------------------------
/app/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/test/__init__.py


--------------------------------------------------------------------------------
/app/webpack.config.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | const path = require('path');
 4 | const BundleTracker = require('webpack-bundle-tracker');
 5 | const ExtractTextPlugin = require('extract-text-webpack-plugin');
 6 | const webpack = require('webpack');
 7 | 
 8 | module.exports = {
 9 |   entry: {
10 |     web: './assets/js/web',
11 |     styles: './assets/css/main',
12 |   },
13 | 
14 |   context: __dirname,
15 | 
16 |   // Include source maps for all compiled files
17 |   devtool: 'source-map',
18 | 
19 |   // Put all output files at assets/bundles
20 |   output: {
21 |     path: path.resolve('./assets/bundles/'),
22 |     filename: "[name].js",
23 |   },
24 | 
25 |   plugins: [
26 |     // BundleTracker lets Django know about the webpack build status, displaying errors if
27 |     // they occur
28 |     new BundleTracker({filename: './assets/bundles/webpack-stats.json'}),
29 | 
30 |     // ExtractTextPlugin allows us to separate CSS output files from JS.
31 |     // See: https://github.com/webpack-contrib/extract-text-webpack-plugin
32 |     new ExtractTextPlugin("[name].css"),
33 |   ],
34 | 
35 |   module: {
36 |     rules: [{
37 |       test: /\.scss$/,
38 |       use: ExtractTextPlugin.extract({
39 |         use: [{
40 |           loader: "css-loader"
41 |         }, {
42 |           loader: "sass-loader"
43 |         }]
44 |       })
45 |     }, {
46 |       test: /\.css$/,
47 |       use: ExtractTextPlugin.extract({
48 |         use: [{
49 |           loader: "css-loader"
50 |         }]
51 |       })
52 |     }, {
53 |       // Stops Bootstrap from complaining
54 |       test: /\.(png|woff|woff2|eot|ttf|svg|otf)$/,
55 |       loader: 'url-loader?limit=100000'
56 |     }, {
57 |       // Compile JSX files to JS
58 |       test: /\.jsx?$/,
59 |       exclude: /node_modules/,
60 |       use: [{
61 |         loader: 'babel-loader',
62 |         options: {
63 |           plugins: ['transform-decorators-legacy'],
64 |           presets: ['env', 'stage-0', 'react']
65 |         }
66 |       }]
67 |     }, {
68 |       test: /\.js$/,
69 |       use: ["source-map-loader"],
70 |       enforce: "pre"
71 |     }]
72 |   },
73 | 
74 |   // TODO: generic way to resolve aliases?
75 |   resolve: {
76 |     symlinks: false, // https://github.com/npm/npm/issues/5875
77 |     modules: ['node_modules', 'assets'],
78 |     extensions: ['.js', '.jsx', '.scss', '.css']
79 |   }
80 | };
81 | 


--------------------------------------------------------------------------------
/config/google.toml:
--------------------------------------------------------------------------------
 1 | [google]
 2 | project = "visualdb-1046"
 3 | zone = "us-east1-d"
 4 | 
 5 | [storage]
 6 | type = "google"
 7 | bucket = "esper"
 8 | path = "public"
 9 | base_image_name = "esper-base"
10 | 
11 | [database]
12 | # type = "local"
13 | type = "google"
14 | name = "esper-dev"
15 | user = "will"
16 | password = "foobar"
17 | 
18 | [compute]
19 | gpu = false
20 | 


--------------------------------------------------------------------------------
/config/local.toml:
--------------------------------------------------------------------------------
 1 | [storage]
 2 | type = "local"       # Video/image files stored on the local machine
 3 | path = "/var/esper"  # Currently unused, ignore
 4 | base_image_name = "esper-base" # Docker base image name
 5 | 
 6 | [database]
 7 | type = "local"       # Database is stored/run locally
 8 | user = "will"        # Username
 9 | password = "foobar"  # Password
10 | 
11 | [compute]
12 | gpu = false          # Whether or not you have a GPU on the local machine
13 | 


--------------------------------------------------------------------------------
/db/.dockerignore:
--------------------------------------------------------------------------------
1 | data


--------------------------------------------------------------------------------
/db/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM postgres:9.5
2 | COPY postgresql.conf /var/lib/postgresql/data/postgresql.conf
3 | 


--------------------------------------------------------------------------------
/guides/dev.md:
--------------------------------------------------------------------------------
 1 | # Developing Esper
 2 | 
 3 | If you're developing features for Esper, first talk to [Will](mailto:wcrichto@cs.stanford.edu) and join our [Slack channel](https://cmugraphics.slack.com).
 4 | 
 5 | 
 6 | ## Frontend
 7 | 
 8 | While editing the SASS or JSX files, use the Webpack watcher:
 9 | ```
10 | dc exec app npm run watch
11 | ```
12 | 
13 | This will automatically rebuild all the frontend files into `assets/bundles` when you change a relevant file.
14 | 
15 | > Note: the watching functionality appears to be broken on OS X, so you'll want to dev on a Linux box. I don't think this is fixable. [See this issue](https://github.com/rails/rails/issues/25186).
16 | 


--------------------------------------------------------------------------------
/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:latest
2 | 


--------------------------------------------------------------------------------
/nginx/google.conf.template:
--------------------------------------------------------------------------------
 1 | # TODO(wcrichto): fallback doesn't work, 404 doesn't get caught because it's behind
 2 | # a redirect now.
 3 | 
 4 | location /system_media {
 5 |     rewrite ^/system_media/(.*)$ /${BUCKET}/$1 break;
 6 |     proxy_pass https://storage.cloud.google.com;
 7 |     proxy_pass_request_headers on;
 8 |     proxy_intercept_errors on;
 9 |     # error_page 301 302 307 = @handle_redirect;
10 |     # error_page 403 404 = @fallback;
11 | }
12 | 
13 | location @handle_redirect {
14 |     set $orig_loc $upstream_http_location;
15 |     proxy_pass $orig_loc;
16 |     error_page 403 = @fallback;
17 |     error_page 404 = @fallback;
18 | }
19 | 


--------------------------------------------------------------------------------
/nginx/local.conf.template:
--------------------------------------------------------------------------------
1 | # INSECURE - allows reads of any file in your esper directory
2 | location /system_media {
3 |     alias /app;
4 |     error_page 403 404 = @fallback;
5 | }
6 | 


--------------------------------------------------------------------------------
/nginx/nginx.conf.template:
--------------------------------------------------------------------------------
 1 | worker_processes 32;
 2 | 
 3 | events {
 4 |     worker_connections   1024;
 5 |     use                  epoll;
 6 |     multi_accept         on;
 7 | }
 8 | 
 9 | http {
10 |     include /etc/nginx/mime.types;
11 | 
12 |     server {
13 |         listen ${PORT};
14 |         server_name server;
15 | 
16 |         access_log /dev/stdout;
17 |         error_log /dev/stdout debug;
18 | 
19 |         client_max_body_size 100M;
20 | 
21 |         location /static {
22 |             alias /app/assets;
23 |         }
24 | 
25 |         location /frameserver {
26 |             rewrite ^/frameserver/(.*)$ /$1 break;
27 |             proxy_pass http://frameserver:7500;
28 |         }
29 | 
30 |         include /etc/nginx/${ESPER_ENV}.conf;
31 | 
32 |         location @fallback {
33 |             rewrite ^(.*)$ /fallback$1;
34 |             proxy_pass http://app:8000;
35 |         }
36 | 
37 |         location / {
38 |             uwsgi_read_timeout 300;
39 |             proxy_connect_timeout 300;
40 |             proxy_send_timeout 300;
41 |             proxy_read_timeout 600;
42 |             proxy_pass http://app:8000;
43 |         }
44 |     }
45 | 
46 |     server {
47 |         listen 8888;
48 |         server_name notebook;
49 | 
50 |         client_max_body_size 100M;
51 | 
52 |         include /etc/nginx/${ESPER_ENV}.conf;
53 | 
54 |         location /django {
55 |             rewrite ^/django/(.*) /$1 break;
56 |             proxy_pass http://localhost:80;
57 |         }
58 | 
59 |         location / {
60 |             proxy_pass http://app:8888;
61 | 
62 |             # https://stackoverflow.com/questions/22665809/how-to-configure-ipython-behind-nginx-in-a-subpath/23912400#23912400
63 |             proxy_http_version 1.1;
64 |             proxy_set_header Upgrade $http_upgrade;
65 |             proxy_set_header Connection "upgrade";
66 |             proxy_set_header Origin "";
67 |         }
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/nginx/subst.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for f in /tmp/*.template
 4 | do
 5 |     full=$(basename $f)
 6 |     name="${full%.*}"
 7 |     envsubst '$ESPER_ENV $BUCKET $PORT $DATA_PATH $HOSTNAME' < $f > /etc/nginx/$name
 8 | done
 9 | 
10 | nginx -g "daemon off;"
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docker-compose
2 | pyyaml
3 | toml
4 | dotmap
5 | 


--------------------------------------------------------------------------------
/scripts/emergency-reset.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | import subprocess as sp
 3 | 
 4 | app = Flask(__name__)
 5 | 
 6 | page = """
 7 | <html>
 8 | <body>
 9 | <script>
10 | function reset() {
11 |   var r = new XMLHttpRequest();
12 |   r.open('GET', '/reset', false);
13 |   r.send();
14 |   alert('Reset successful!');
15 | }
16 | </script>
17 | <button onclick="reset()">EMERGENCY RESET</button>
18 | </body>
19 | </html>
20 | """
21 | 
22 | 
23 | @app.route("/")
24 | def index():
25 |     return page
26 | 
27 | 
28 | @app.route('/reset')
29 | def reset():
30 |     sp.check_call(
31 |         'docker-compose stop -t 0 && docker-compose down && docker-compose up -d', shell=True)
32 |     return ""
33 | 


--------------------------------------------------------------------------------
/scripts/gce-gpu-setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Install NVIDIA GPU driver
 4 | # https://cloud.google.com/compute/docs/gpus/add-gpus#install-driver-script
 5 | 
 6 | echo "Checking for CUDA and installing."
 7 | # Check for CUDA and try to install.
 8 | if ! dpkg-query -W cuda-9-0; then
 9 |   # The 16.04 installer works with 16.10.
10 |   curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_9.0.176-1_amd64.deb
11 |   dpkg -i ./cuda-repo-ubuntu1604_9.0.176-1_amd64.deb
12 |   apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
13 |   apt-get update
14 |   apt-get install cuda-9-0 -y
15 | fi
16 | # Enable persistence mode
17 | nvidia-smi -pm 1
18 | 
19 | 
20 | # Install nvidia-docker
21 | # https://github.com/NVIDIA/nvidia-docker#ubuntu-140416041804-debian-jessiestretch
22 | 
23 | # Add the package repositories
24 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
25 |   sudo apt-key add -
26 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
27 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
28 |   sudo tee /etc/apt/sources.list.d/nvidia-docker.list
29 | sudo apt-get update
30 | 
31 | # Install nvidia-docker2 and reload the Docker daemon configuration
32 | sudo apt-get install -y nvidia-docker2
33 | sudo pkill -SIGHUP dockerd
34 | 


--------------------------------------------------------------------------------
/scripts/get-bucket-size.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | bq query -q --format=json "select last(storage_byte_hours) from storageanalysis.storage" | jq '(.[0].f0_ | tonumber) / (1024 * 1024 * 1024 * 1024 * 24)'
3 | echo "terabytes"
4 | 


--------------------------------------------------------------------------------
/scripts/jupyter-token.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker-compose logs app | grep "?token" | sed 's/.*\?token=\(.*\)/\1/p' | tail -n 1
3 | 


--------------------------------------------------------------------------------
/scripts/run-app-tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Run tests in app/tests (need to be in the app directory)
4 | cd app
5 | python3 -m unittest discover test
6 | 


--------------------------------------------------------------------------------
/scripts/serve-reset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip3 install flask
4 | FLASK_APP=scripts/emergency-reset.py flask run --host=0.0.0.0 --port=9999
5 | 


--------------------------------------------------------------------------------
/scripts/setup-db-csv-dir.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker-compose exec db bash -c "mkdir -p /app/pg && chown postgres /app/pg"
4 | 


--------------------------------------------------------------------------------
/scripts/transfer-db.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SERVICE_ACCOUNT=$(gcloud sql instances describe esper-shared --format=json | jq -r '.serviceAccountEmailAddress')
 4 | 
 5 | gcloud sql instances export esper-public-dev gs://esper/tmp-dump.sql --database=esper
 6 | gsutil acl ch -u $SERVICE_ACCOUNT:W gs://esper
 7 | gsutil acl ch -u $SERVICE_ACCOUNT:R gs://esper/tmp-dump.sql
 8 | cat <<EOF | psql -h 104.196.237.12 postgres will
 9 | DROP DATABASE esper;
10 | CREATE DATABASE esper;
11 | EOF
12 | gcloud sql instances import esper-shared gs://esper/tmp-dump.sql --database=esper
13 | gsutil rm gs://esper/tmp-dump.sql
14 | 


--------------------------------------------------------------------------------
/scripts/transfer-storage-logs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Transfers GCS access logs into BigQuery.
 4 | # See: https://cloud.google.com/storage/docs/access-logs
 5 | 
 6 | wget http://storage.googleapis.com/pub/cloud_storage_usage_schema_v0.json
 7 | wget http://storage.googleapis.com/pub/cloud_storage_storage_schema_v0.json
 8 | bq load --skip_leading_rows=1 storageanalysis.usage \
 9 |    "gs://esper/logs/_usage*" \
10 |    ./cloud_storage_usage_schema_v0.json
11 | bq load --skip_leading_rows=1 storageanalysis.storage \
12 |    "gs://esper/logs/_storage*" \
13 |    ./cloud_storage_storage_schema_v0.json
14 | gsutil -m rm -r gs://esper/logs
15 | 


--------------------------------------------------------------------------------
/scripts/travis-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [ "$TRAVIS_BRANCH" = "master" -a "$TRAVIS_PULL_REQUEST" = "false" ]; then
 5 |     PUSH=0
 6 | else
 7 |     PUSH=1
 8 | fi
 9 | 
10 | build_docker() {
11 |     # https://github.com/travis-ci/travis-ci/issues/4704#issuecomment-348435959
12 |     python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
13 | 
14 |     python3 configure.py -c config/local.toml
15 |     docker-compose build app
16 | 
17 |     #docker build -f app/Dockerfile.app -t $DOCKER_REPO:$1 --build-arg device=$1 --build-arg device2=$1 app
18 | 
19 |     # if [ $PUSH -eq 0 ]; then
20 |     #     docker push $DOCKER_REPO:$1
21 |     #     docker rmi -f $DOCKER_REPO:$1
22 |     # fi
23 | }
24 | 
25 | if [ $PUSH -eq 0 ]; then
26 |     yes | docker login -u="$DOCKER_USER" -p="$DOCKER_PASS"
27 | fi
28 | 
29 | build_docker cpu
30 | # build_docker gpu
31 | 


--------------------------------------------------------------------------------
/spark/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG base_name
 2 | FROM ${base_name}:cpu
 3 | ARG spark_version=2.4.0
 4 | 
 5 | WORKDIR /opt
 6 | RUN wget -q http://apache.mirrors.tds.net/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.7.tgz && \
 7 |     tar -xf spark-${spark_version}-bin-hadoop2.7.tgz && \
 8 |     rm spark-${spark_version}-bin-hadoop2.7.tgz
 9 | WORKDIR /opt/spark-${spark_version}-bin-hadoop2.7
10 | RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python
11 | CMD ./sbin/start-master.sh -h 0.0.0.0 && \
12 |     ./sbin/start-slave.sh spark://localhost:7077 && \
13 |     sleep infinity
14 | 


--------------------------------------------------------------------------------