├── frontend ├── src │ ├── App.css │ ├── components │ │ ├── img │ │ │ ├── fb.png │ │ │ ├── twitter.png │ │ │ ├── instagram.png │ │ │ ├── linkedin.png │ │ │ └── chevron-left-solid.svg │ │ ├── H1.js │ │ ├── H2.js │ │ ├── FaceHitAnimation.css │ │ ├── InterestFooter.jsx │ │ ├── BackButton.js │ │ ├── PostsCard.jsx │ │ ├── Button.jsx │ │ ├── StatsCard.jsx │ │ ├── ProfileCard.jsx │ │ ├── BioCard.jsx │ │ ├── IGPost.js │ │ ├── LocationCard.jsx │ │ ├── EndButton.jsx │ │ ├── InterestCard.jsx │ │ ├── Slideshow.jsx │ │ ├── Form.js │ │ ├── Popup.jsx │ │ ├── Start.js │ │ ├── FaceHitAnimation.js │ │ └── camera-feed.js │ ├── notfound.js │ ├── static │ │ └── fonts │ │ │ └── FontsFree-Net-SFProDisplay-Regular.ttf │ ├── creativeCode.css │ ├── App.test.js │ ├── css │ │ └── endButton.css │ ├── pages │ │ ├── Greeting.jsx │ │ ├── GroupIntent.jsx │ │ ├── endscreen.jsx │ │ ├── SearchProfile.jsx │ │ └── ExampleProfileSelection.jsx │ ├── index.js │ └── App.js ├── public │ ├── robots.txt │ ├── favicon.ico │ ├── logo192.png │ ├── logo512.png │ ├── Barlow-SemiBold.otf │ ├── favicon │ │ ├── favicon.ico │ │ ├── apple-icon.png │ │ ├── favicon-16x16.png │ │ ├── favicon-32x32.png │ │ ├── favicon-96x96.png │ │ ├── ms-icon-70x70.png │ │ ├── apple-icon-57x57.png │ │ ├── apple-icon-60x60.png │ │ ├── apple-icon-72x72.png │ │ ├── apple-icon-76x76.png │ │ ├── ms-icon-144x144.png │ │ ├── ms-icon-150x150.png │ │ ├── ms-icon-310x310.png │ │ ├── android-icon-36x36.png │ │ ├── android-icon-48x48.png │ │ ├── android-icon-72x72.png │ │ ├── android-icon-96x96.png │ │ ├── apple-icon-114x114.png │ │ ├── apple-icon-120x120.png │ │ ├── apple-icon-144x144.png │ │ ├── apple-icon-152x152.png │ │ ├── apple-icon-180x180.png │ │ ├── android-icon-144x144.png │ │ ├── android-icon-192x192.png │ │ ├── apple-icon-precomposed.png │ │ ├── browserconfig.xml │ │ └── manifest.json │ ├── manifest.json │ ├── background.js │ └── index.html ├── readme.pdf ├── package.json └── README.md ├── .vscode └── settings.json ├── twitter ├── scraper │ ├── twitterscraper │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── insert_seed.py │ │ ├── posts_scraper.py │ │ ├── users_scraper.py │ │ ├── following_scraper.py │ │ ├── followers_scraper.py │ │ └── scraper_manager.py │ ├── img │ │ └── twitter_scraper-architecture.jpg │ ├── Dockerfile │ ├── pyproject.toml │ └── requirements.txt ├── models │ └── gorm.go ├── debezium │ ├── Dockerfile │ └── register-postgres.json ├── inserter │ ├── posts │ │ ├── Dockerfile │ │ ├── main │ │ │ └── main.go │ │ └── inserter.go │ └── users │ │ ├── Dockerfile │ │ ├── main │ │ └── main.go │ │ └── inserter.go └── filter │ └── user_names │ ├── Dockerfile │ ├── main.go │ └── filter_test.go ├── insta ├── db │ ├── migrations │ │ ├── 000006_set_replica_identity_full.down.sql │ │ ├── 000008_set_replica_identity_full.down.sql │ │ ├── 000003_create_posts_table.down.sql │ │ ├── 000001_create_users_table.down.sql │ │ ├── 000004_create_comments_table.down.sql │ │ ├── 000013_create_post_likes.down.sql │ │ ├── 000005_create_post_tagged_users_table.down.sql │ │ ├── 000009_add_index_to_posts_user_id_ref.down.sql │ │ ├── 000009_add_index_to_posts_user_id_ref.up.sql │ │ ├── 000012_add_index_to_post_tagged_users_ref.down.sql │ │ ├── 000014_add_index_to_post_likes_user_id_ref.down.sql │ │ ├── 000015_add_index_to_post_likes_post_id_ref.down.sql │ │ ├── 000010_add_index_to_insta_users_user_name_ref.down.sql │ │ ├── 000007_add_internal_picture_url_to_posts.down.sql │ │ ├── 000016_add_index_to_post_tagged_users_post_id_ref.down.sql │ │ ├── 000007_add_internal_picture_url_to_posts.up.sql │ │ ├── 000014_add_index_to_post_likes_user_id_ref.up.sql │ │ ├── 000015_add_index_to_post_likes_post_id_ref.up.sql │ │ ├── 000010_add_index_to_insta_users_user_name_ref.up.sql │ │ ├── 000012_add_index_to_post_tagged_users_ref.up.sql │ │ ├── 000016_add_index_to_post_tagged_users_post_id_ref.up.sql │ │ ├── 000011_add_index_to_follows_ref.down.sql │ │ ├── 000011_add_index_to_follows_ref.up.sql │ │ ├── 000002_create_follows_table.down.sql │ │ ├── 000006_set_replica_identity_full.up.sql │ │ ├── 000008_set_replica_identity_full.up.sql │ │ ├── 000001_create_users_table.up.sql │ │ ├── 000005_create_post_tagged_users_table.up.sql │ │ ├── 000013_create_post_likes.up.sql │ │ ├── 000004_create_comments_table.up.sql │ │ ├── 000002_create_follows_table.up.sql │ │ └── 000003_create_posts_table.up.sql │ ├── Dockerfile │ ├── kubernetes │ │ ├── Dockerfile │ │ ├── kube-register-postgres.json │ │ └── kube-start-postgres.sh │ ├── start-postgres.sh │ └── register-postgres.json ├── models │ ├── post_download_job.go │ └── faces.go ├── posts_face-detection │ ├── worker.Dockerfile │ └── main │ │ └── main.go ├── filter │ ├── post_pictures │ │ ├── Dockerfile │ │ ├── main.go │ │ └── filter_test.go │ ├── post_face-recon │ │ ├── Dockerfile │ │ └── main.go │ └── user_names │ │ ├── Dockerfile │ │ ├── main.go │ │ └── filter_test.go ├── inserter │ ├── posts_face │ │ ├── Dockerfile │ │ └── main │ │ │ └── main.go │ ├── postgres │ │ ├── Dockerfile │ │ └── main │ │ │ └── main.go │ ├── likes │ │ ├── Dockerfile │ │ └── main │ │ │ └── main.go │ ├── posts │ │ ├── Dockerfile │ │ └── main │ │ │ └── main.go │ ├── comments │ │ ├── Dockerfile │ │ └── main │ │ │ └── main.go │ └── neo4j │ │ ├── posts │ │ ├── Dockerfile │ │ └── main.go │ │ ├── user │ │ ├── Dockerfile │ │ └── main.go │ │ └── tagged_users │ │ ├── Dockerfile │ │ └── main.go ├── pics-downloader │ ├── Dockerfile │ └── main │ │ └── main.go ├── indexer │ ├── faces │ │ ├── Dockerfile │ │ └── insta_faces_indexer.go │ ├── posts │ │ ├── Dockerfile │ │ └── insta_posts_indexer.go │ ├── users │ │ ├── Dockerfile │ │ └── insta_users_indexer.go │ └── comments │ │ ├── Dockerfile │ │ └── insta_comments_indexer.go └── scraper │ ├── likes │ ├── Dockerfile │ ├── main │ │ └── main.go │ └── insta-model.go │ ├── posts │ ├── Dockerfile │ └── main │ │ └── main.go │ ├── user │ ├── Dockerfile │ └── main │ │ └── main.go │ └── comments │ ├── Dockerfile │ └── main │ └── main.go ├── faces ├── .dockerignore ├── Makefile ├── requirements.txt ├── metrics.py ├── proto │ └── recognizer.proto ├── Dockerfile ├── recognitiontest │ └── main.go ├── server.py ├── recognizer_pb2_grpc.py └── recognizer.py ├── docs ├── architecture.png ├── insta_schema.png └── twitter_schema.png ├── elastic ├── indexer │ └── model.go ├── indicies.go ├── utils.go ├── models │ ├── insta.go │ └── face.go ├── build │ └── Dockerfile ├── search │ └── facetest │ │ └── main.go └── mapping.go ├── api ├── envoy-proxy │ ├── Dockerfile │ └── envoy.yaml ├── grpcserver │ ├── Dockerfile │ └── main │ │ └── main.go ├── README.md └── proto │ └── usersearch.proto ├── .gitignore ├── aws_service ├── main │ └── main.go ├── proto │ └── renewingAddress.proto └── Dockerfile ├── .github ├── ISSUE_TEMPLATE │ └── default.md └── pull_request_template.md ├── .dockerignore ├── cli ├── Dockerfile └── main │ └── main.go ├── face-recognition ├── Dockerfile └── main │ └── main.go ├── scraper-client ├── scraper-client.go ├── scraper-config.go └── simple-scraper-client.go ├── neo4j ├── create-import-user-json │ ├── Dockerfile │ └── main │ │ └── main.go └── inserter │ └── inserter.go ├── config ├── postgres-config.go └── s3-config.go ├── utils ├── neo4j-utils.go └── utils.go ├── tools ├── Dockerfile └── .zshrc ├── db ├── utils.go └── README.md ├── Makefile ├── nlp └── frequency-analyzer │ ├── cities.json │ └── main │ └── main.go ├── go.mod ├── kafka └── changestream │ ├── change_message.go │ └── filter.go ├── http_header-generator └── generator.go ├── imgproxy └── urlbuilder.go ├── worker ├── worker.go └── builder.go ├── service └── executor.go └── CONTRIBUTING.md /frontend/src/App.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /insta/db/migrations/000006_set_replica_identity_full.down.sql: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /insta/db/migrations/000008_set_replica_identity_full.down.sql: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /insta/db/migrations/000003_create_posts_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS posts; -------------------------------------------------------------------------------- /insta/db/migrations/000001_create_users_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS users; 2 | -------------------------------------------------------------------------------- /faces/.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | __pycache__ 3 | */__pycache__ 4 | env 5 | images 6 | -------------------------------------------------------------------------------- /frontend/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | -------------------------------------------------------------------------------- /insta/db/migrations/000004_create_comments_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS comments; 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000013_create_post_likes.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS post_likes; 2 | -------------------------------------------------------------------------------- /frontend/readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/readme.pdf -------------------------------------------------------------------------------- /docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/architecture.png -------------------------------------------------------------------------------- /docs/insta_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/insta_schema.png -------------------------------------------------------------------------------- /docs/twitter_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/twitter_schema.png -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/logo192.png -------------------------------------------------------------------------------- /frontend/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/logo512.png -------------------------------------------------------------------------------- /insta/db/migrations/000005_create_post_tagged_users_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS post_tagged_users; 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000009_add_index_to_posts_user_id_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS posts_user_id_index; 2 | -------------------------------------------------------------------------------- /faces/Makefile: -------------------------------------------------------------------------------- 1 | gen: 2 | python -m grpc_tools.protoc -Iproto --python_out=. --grpc_python_out=. proto/recognizer.proto 3 | -------------------------------------------------------------------------------- /insta/db/migrations/000009_add_index_to_posts_user_id_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX posts_user_id_index ON posts (user_id); 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000012_add_index_to_post_tagged_users_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS post_tagged_users_index; 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000014_add_index_to_post_likes_user_id_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS post_likes_user_id_index; 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000015_add_index_to_post_likes_post_id_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS post_likes_post_id_index; 2 | -------------------------------------------------------------------------------- /frontend/src/components/img/fb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/fb.png -------------------------------------------------------------------------------- /frontend/src/notfound.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | const Notfound = () =>

Not found

3 | export default Notfound 4 | -------------------------------------------------------------------------------- /insta/db/migrations/000010_add_index_to_insta_users_user_name_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS insta_users_user_name_index; 2 | -------------------------------------------------------------------------------- /frontend/public/Barlow-SemiBold.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/Barlow-SemiBold.otf -------------------------------------------------------------------------------- /frontend/public/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon.ico -------------------------------------------------------------------------------- /insta/db/migrations/000007_add_internal_picture_url_to_posts.down.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE posts 2 | DROP COLUMN internal_picture_url; 3 | -------------------------------------------------------------------------------- /insta/db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS tagged_posts_post_id_index; 2 | -------------------------------------------------------------------------------- /elastic/indexer/model.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | type BulkIndexDoc struct { 4 | DocumentId string 5 | BulkOperation string 6 | } 7 | -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon.png -------------------------------------------------------------------------------- /frontend/src/components/img/twitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/twitter.png -------------------------------------------------------------------------------- /insta/db/migrations/000007_add_internal_picture_url_to_posts.up.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE posts 2 | ADD COLUMN internal_picture_url VARCHAR; 3 | -------------------------------------------------------------------------------- /insta/db/migrations/000014_add_index_to_post_likes_user_id_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX post_likes_user_id_index ON post_likes(user_id); 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000015_add_index_to_post_likes_post_id_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX post_likes_post_id_index ON post_likes(post_id); 2 | -------------------------------------------------------------------------------- /frontend/public/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /frontend/public/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /frontend/public/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /frontend/public/favicon/ms-icon-70x70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-70x70.png -------------------------------------------------------------------------------- /frontend/src/components/img/instagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/instagram.png -------------------------------------------------------------------------------- /frontend/src/components/img/linkedin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/linkedin.png -------------------------------------------------------------------------------- /insta/db/migrations/000010_add_index_to_insta_users_user_name_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX insta_users_user_name_index ON users (user_name); 2 | -------------------------------------------------------------------------------- /insta/db/migrations/000012_add_index_to_post_tagged_users_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX post_tagged_users_index ON post_tagged_users (user_id); 2 | -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-57x57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-57x57.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-60x60.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-72x72.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-76x76.png -------------------------------------------------------------------------------- /frontend/public/favicon/ms-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-144x144.png -------------------------------------------------------------------------------- /frontend/public/favicon/ms-icon-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-150x150.png -------------------------------------------------------------------------------- /frontend/public/favicon/ms-icon-310x310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-310x310.png -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-36x36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-36x36.png -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-48x48.png -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-72x72.png -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-96x96.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-114x114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-114x114.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-120x120.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-144x144.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-152x152.png -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-180x180.png -------------------------------------------------------------------------------- /insta/db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX tagged_posts_post_id_index ON post_tagged_users (post_id); 2 | -------------------------------------------------------------------------------- /api/envoy-proxy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM envoyproxy/envoy-dev:4e858f17fe08224c9c089240908ccd0c518e01a7 2 | COPY api/envoy-proxy/envoy.yaml /etc/envoy/envoy.yaml 3 | -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-144x144.png -------------------------------------------------------------------------------- /frontend/public/favicon/android-icon-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-192x192.png -------------------------------------------------------------------------------- /insta/db/migrations/000011_add_index_to_follows_ref.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS folows_from_id_index; 2 | DROP INDEX IF EXISTS folows_to_id_index; 3 | 4 | -------------------------------------------------------------------------------- /frontend/public/favicon/apple-icon-precomposed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-precomposed.png -------------------------------------------------------------------------------- /twitter/scraper/img/twitter_scraper-architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/twitter/scraper/img/twitter_scraper-architecture.jpg -------------------------------------------------------------------------------- /insta/db/migrations/000011_add_index_to_follows_ref.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX folows_from_id_index ON follows (from_id); 2 | CREATE INDEX folows_to_id_index ON follows (to_id); 3 | -------------------------------------------------------------------------------- /frontend/src/static/fonts/FontsFree-Net-SFProDisplay-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/static/fonts/FontsFree-Net-SFProDisplay-Regular.ttf -------------------------------------------------------------------------------- /elastic/indicies.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | const ( 4 | CommentsIndex = "insta_comments" 5 | FacesIndex = "faces" 6 | PostsIndex = "insta_posts" 7 | UsersIndex = "insta_users" 8 | ) 9 | -------------------------------------------------------------------------------- /insta/db/migrations/000002_create_follows_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX IF EXISTS follows_from_id_fkey; 2 | DROP INDEX IF EXISTS follows_to_id_fkey; 3 | DROP INDEX IF EXISTS follows_uniq_relationship_index; 4 | DROP TABLE IF EXISTS follows; 5 | -------------------------------------------------------------------------------- /insta/db/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM migrate/migrate:v4.6.2 2 | 3 | RUN apk add --no-cache --upgrade \ 4 | bash \ 5 | curl 6 | 7 | WORKDIR /src 8 | COPY insta/db/ db/ 9 | 10 | ENTRYPOINT [ "bash" ] 11 | CMD [ "db/start-postgres.sh" ] 12 | -------------------------------------------------------------------------------- /insta/db/migrations/000006_set_replica_identity_full.up.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE posts REPLICA IDENTITY FULL; 2 | ALTER TABLE users REPLICA IDENTITY FULL; 3 | ALTER TABLE follows REPLICA IDENTITY FULL; 4 | ALTER TABLE comments REPLICA IDENTITY FULL; 5 | -------------------------------------------------------------------------------- /insta/db/migrations/000008_set_replica_identity_full.up.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE posts REPLICA IDENTITY FULL; 2 | ALTER TABLE users REPLICA IDENTITY FULL; 3 | ALTER TABLE follows REPLICA IDENTITY FULL; 4 | ALTER TABLE comments REPLICA IDENTITY FULL; 5 | -------------------------------------------------------------------------------- /insta/models/post_download_job.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | // PostDownloadJob represents a post which should be downloaded 4 | type PostDownloadJob struct { 5 | PostID int `json:"post_id"` 6 | PictureURL string `json:"picture_url"` 7 | } 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .idea/ 3 | .vscode/ 4 | 5 | # Python 6 | __pycache__/ 7 | *.pyc 8 | 9 | .DS_Store 10 | .vscode 11 | 12 | node_modules 13 | frontend/yarn.lock 14 | frontend/build 15 | 16 | env 17 | faces/images 18 | faces/img.jpg 19 | 20 | -------------------------------------------------------------------------------- /frontend/src/components/H1.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | function H1(props) { 4 | return ( 5 |
6 |

{props.children}

7 |
8 | ); 9 | } 10 | 11 | export default H1; 12 | -------------------------------------------------------------------------------- /insta/db/migrations/000001_create_users_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS users( 2 | id serial PRIMARY KEY, 3 | user_name VARCHAR UNIQUE NOT NULL, 4 | real_name VARCHAR, 5 | avatar_url VARCHAR, 6 | bio text, 7 | crawl_ts integer 8 | ); 9 | -------------------------------------------------------------------------------- /frontend/src/components/H2.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | function H2(props) { 4 | return ( 5 |
6 |

{props.children}

7 |
8 | ); 9 | } 10 | 11 | export default H2; 12 | -------------------------------------------------------------------------------- /insta/db/migrations/000005_create_post_tagged_users_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS post_tagged_users 2 | ( 3 | id serial, 4 | post_id INTEGER REFERENCES posts (id), 5 | user_id INTEGER REFERENCES users (id), 6 | PRIMARY KEY (post_id, user_id) 7 | ); 8 | -------------------------------------------------------------------------------- /insta/db/migrations/000013_create_post_likes.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS post_likes 2 | ( 3 | id serial PRIMARY KEY, 4 | like_id VARCHAR UNIQUE NOT NULL, 5 | user_id INTEGER REFERENCES users (id), 6 | post_id INTEGER REFERENCES posts(id) 7 | ); 8 | -------------------------------------------------------------------------------- /twitter/models/gorm.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import "time" 4 | 5 | // GormModelWithoutID is an alternative definition for gorm.Model without an ID 6 | type GormModelWithoutID struct { 7 | CreatedAt time.Time 8 | UpdatedAt time.Time 9 | DeletedAt *time.Time 10 | } 11 | -------------------------------------------------------------------------------- /frontend/src/creativeCode.css: -------------------------------------------------------------------------------- 1 | .white-background { 2 | position: fixed; 3 | z-index: 1; 4 | top: 0; 5 | left: 0; 6 | right: 0; 7 | bottom: 0; 8 | background: white; 9 | } 10 | 11 | .container { 12 | position: relative; 13 | z-index: 2; 14 | } -------------------------------------------------------------------------------- /insta/db/migrations/000004_create_comments_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS comments( 2 | id serial PRIMARY KEY, 3 | post_id INTEGER REFERENCES posts(id), 4 | comment_id VARCHAR UNIQUE NOT NULL, 5 | comment_text text, 6 | owner_user_id INTEGER REFERENCES users(id) 7 | ); 8 | -------------------------------------------------------------------------------- /insta/db/migrations/000002_create_follows_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS follows( 2 | id serial PRIMARY KEY, 3 | from_id INTEGER REFERENCES users(id), 4 | to_id INTEGER REFERENCES users(id) 5 | ); 6 | 7 | CREATE UNIQUE INDEX follows_uniq_relationship_index ON follows (from_id, to_id); 8 | -------------------------------------------------------------------------------- /frontend/src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import App from './App'; 4 | 5 | it('renders without crashing', () => { 6 | const div = document.createElement('div'); 7 | ReactDOM.render(, div); 8 | ReactDOM.unmountComponentAtNode(div); 9 | }); 10 | -------------------------------------------------------------------------------- /frontend/src/components/FaceHitAnimation.css: -------------------------------------------------------------------------------- 1 | .BackgroundImage { 2 | position: fixed; 3 | width: 600px; 4 | } 5 | 6 | .BackgroundImage-center { 7 | width: 800px; 8 | height: 800px; 9 | position: fixed; 10 | top: 50%; 11 | left: 50%; 12 | margin-left: -400px; 13 | margin-top: -400px; 14 | } 15 | -------------------------------------------------------------------------------- /insta/db/migrations/000003_create_posts_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS posts 2 | ( 3 | id serial PRIMARY KEY, 4 | user_id INTEGER REFERENCES users (id), 5 | post_id VARCHAR UNIQUE NOT NULL, 6 | short_code VARCHAR, 7 | picture_url VARCHAR, 8 | caption VARCHAR 9 | ); 10 | -------------------------------------------------------------------------------- /frontend/public/favicon/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | #ffffff -------------------------------------------------------------------------------- /aws_service/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/codeuniversity/smag-mvp/aws_service" 5 | "github.com/codeuniversity/smag-mvp/utils" 6 | ) 7 | 8 | func main() { 9 | grpcPort := utils.GetStringFromEnvWithDefault("GRPC_PORT", "9900") 10 | s := aws_service.New(grpcPort) 11 | s.Listen() 12 | } 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/default.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Default 3 | about: Shortcut for writing a new issue 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | __Description__ 11 | 12 | ... 13 | 14 | __Checklist__ 15 | 16 | - [ ] ... 17 | 18 | __Attachments__ 19 | 20 | - [Title](URL) 21 | - ![Alt](Link to image) 22 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # IDE stuf 2 | .vscode/ 3 | .idea/ 4 | 5 | # git stuff 6 | .gitignore 7 | *.md 8 | 9 | # DOCKER stuff 10 | docker-compose.yml 11 | */Dockerfile 12 | Dockerfile 13 | *.Dockerfile 14 | .dockerignore 15 | 16 | # PYTHON stuff 17 | __pycache__/ 18 | *.pyc 19 | *faces/env/* 20 | *faces/images/* 21 | frontend 22 | Makefile 23 | -------------------------------------------------------------------------------- /insta/posts_face-detection/worker.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM codesmag/opencv AS builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN GOOS=linux go build -o worker.bin insta/posts_face-detection/main/main.go 7 | COPY insta/posts_face-detection/haarcascade_frontalface_alt.xml . 8 | 9 | CMD ["./worker.bin"] 10 | -------------------------------------------------------------------------------- /twitter/debezium/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | curl 5 | 6 | WORKDIR /src 7 | COPY register-postgres.json . 8 | 9 | SHELL [ "bash" ] 10 | CMD [ "curl", "-i", "-X", "POST", "-H", "Accept:application/json", "-H", "Content-Type:application/json", "http://connect:8083/connectors/", "-d", "@register-postgres.json" ] 11 | -------------------------------------------------------------------------------- /faces/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2019.9.11 2 | chardet==3.0.4 3 | Click==7.0 4 | dlib==19.9.0 5 | face-recognition==1.2.3 6 | face-recognition-models==0.3.0 7 | grpcio==1.18.0 8 | grpcio-tools==1.18.0 9 | idna==2.8 10 | numpy==1.17.4 11 | Pillow==6.2.1 12 | prometheus-client==0.7.1 13 | protobuf==3.10.0 14 | pycodestyle==2.5.0 15 | requests==2.22.0 16 | six==1.13.0 17 | urllib3==1.25.7 18 | -------------------------------------------------------------------------------- /insta/filter/post_pictures/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o filter insta/filter/post_pictures/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY --from=builder /app/filter . 12 | CMD ["./filter"] 13 | -------------------------------------------------------------------------------- /insta/filter/post_face-recon/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o filter insta/filter/post_face-recon/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY --from=builder /app/filter . 12 | CMD ["./filter"] 13 | -------------------------------------------------------------------------------- /insta/inserter/posts_face/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o inserter insta/inserter/posts_face/main/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY --from=builder /app/inserter . 12 | CMD ["./inserter"] 13 | -------------------------------------------------------------------------------- /insta/pics-downloader/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o downloader insta/pics-downloader/main/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY --from=builder /app/downloader . 12 | CMD ["./downloader"] 13 | -------------------------------------------------------------------------------- /cli/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o smag-cli cli/main/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY --from=builder /app/smag-cli . 12 | ENTRYPOINT ["./smag-cli"] 13 | CMD [ "" ] # optional explicit statement 14 | -------------------------------------------------------------------------------- /faces/metrics.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import Counter, Histogram 2 | 3 | request_counter = Counter( 4 | 'request_count', 'times an endpoint was called', labelnames=['endpoint']) 5 | 6 | request_latency_histogram = Histogram('request_latency_seconds', 'the time it takes for an endpoint to respond', 7 | labelnames=['endpoint'], buckets=(.01, .05, .1, .5, 1.0, 2.0, 4.0, 8.0, 10.0)) 8 | -------------------------------------------------------------------------------- /face-recognition/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o worker.bin face-recognition/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY --from=builder /app/worker.bin /app 13 | WORKDIR /app 14 | CMD ["./worker.bin"] 15 | -------------------------------------------------------------------------------- /frontend/src/components/InterestFooter.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | 4 | const InterestFooter = props => { 5 | return ( 6 |
7 |

{props.title}

8 |

{props.details}

9 |
10 | ); 11 | }; 12 | 13 | InterestFooter.propTypes = { 14 | title: PropTypes.string 15 | }; 16 | 17 | export default InterestFooter; 18 | -------------------------------------------------------------------------------- /insta/db/kubernetes/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | RUN apk add --no-cache curl postgresql-client tar bash jq 3 | RUN wget https://github.com/golang-migrate/migrate/releases/download/v4.6.2/migrate.linux-amd64.tar.gz && tar -xf migrate.linux-amd64.tar.gz 4 | RUN mv migrate.linux-amd64 usr/bin/migrate 5 | WORKDIR /script 6 | COPY insta/db/migrations db/migrations 7 | COPY insta/db/kubernetes . 8 | ENTRYPOINT ["bash", "kube-start-postgres.sh"] 9 | -------------------------------------------------------------------------------- /api/grpcserver/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o grpc_server api/grpcserver/main/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | WORKDIR /app 11 | COPY nlp/frequency-analyzer/cities.json . 12 | COPY --from=builder /app/grpc_server . 13 | CMD ["./grpc_server"] 14 | -------------------------------------------------------------------------------- /aws_service/proto/renewingAddress.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package proto; 4 | 5 | service ElasticIpService { 6 | rpc renewElasticIp(RenewingElasticIp) returns (RenewedElasticResult) {} 7 | } 8 | 9 | message RenewedElasticResult { 10 | string elasticIp = 1; 11 | } 12 | 13 | message RenewingElasticIp { 14 | string instanceId = 1; 15 | string node = 2; 16 | string pod = 3; 17 | string pod_ip = 4; 18 | } 19 | -------------------------------------------------------------------------------- /frontend/src/components/BackButton.js: -------------------------------------------------------------------------------- 1 | import React, { Component, useState } from "react"; 2 | import { withRouter, history } from "react-router"; 3 | import backicon from "./img/chevron-left-solid.svg"; 4 | 5 | const BackButton = () => ( 6 |
7 | 8 | 9 | 10 |
11 | ); 12 | 13 | export default BackButton; 14 | -------------------------------------------------------------------------------- /frontend/src/components/PostsCard.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import Slideshow from "./Slideshow"; 4 | 5 | const PostsCard = props => { 6 | return ( 7 |
8 | 9 |
10 | ); 11 | }; 12 | 13 | PostsCard.propTypes = { 14 | slides: PropTypes.array 15 | }; 16 | 17 | export default PostsCard; 18 | -------------------------------------------------------------------------------- /frontend/src/css/endButton.css: -------------------------------------------------------------------------------- 1 | .endButton { 2 | height: 50px; 3 | width: 50px; 4 | background-color: #fafafa; 5 | border-radius: 25px; 6 | position: fixed; 7 | z-index: 2; 8 | top: 20px; 9 | right: 20px; 10 | } 11 | 12 | .endButton a { 13 | width: 50px; 14 | height: 50px; 15 | display: flex; 16 | justify-content: center; 17 | align-items: center; 18 | } 19 | 20 | .endButton a svg { 21 | color: black; 22 | } 23 | -------------------------------------------------------------------------------- /insta/db/start-postgres.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "# MIGRATE DATABASE" 4 | /migrate -database "postgres://postgres:12345678@postgres:5432/${POSTGRES_DB}?sslmode=disable" -path debezium/migrations up 5 | 6 | pwd 7 | 8 | ls -a 9 | 10 | echo "# PREPARE DEBEZIUM" 11 | curl -i -X POST -H "Accept:application/json" \ 12 | -H "Content-Type:application/json" \ 13 | http://connect:8083/connectors/ \ 14 | -d @db/register-postgres.json 15 | -------------------------------------------------------------------------------- /insta/db/register-postgres.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "inventory-connector", 3 | "config": { 4 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector", 5 | "tasks.max": "1", 6 | "database.hostname": "postgres", 7 | "database.port": "5432", 8 | "database.user": "postgres", 9 | "database.password": "12345678", 10 | "database.dbname": "instascraper", 11 | "database.server.name": "postgres" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /twitter/scraper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | 3 | WORKDIR /src 4 | 5 | COPY requirements.txt . 6 | RUN pip install -r requirements.txt 7 | 8 | COPY ./twitterscraper ./twitterscraper 9 | 10 | ENTRYPOINT [ "python" ] 11 | CMD [ "-c", "raise Exception('Please set the CMD to either `-m twitterscraper.posts_scraper.py`, `-m twitterscraper.users_scraper.py`, `-m twitterscraper.follwers_scraper.py` or `-m twitterscraper.follwing_scraper.py`')" ] 12 | -------------------------------------------------------------------------------- /elastic/utils.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | import ( 4 | "github.com/elastic/go-elasticsearch/v7" 5 | ) 6 | 7 | // InitializeElasticSearch returns an initialised elastic search client 8 | func InitializeElasticSearch(esHosts []string) *elasticsearch.Client { 9 | cfg := elasticsearch.Config{ 10 | Addresses: esHosts, 11 | } 12 | client, err := elasticsearch.NewClient(cfg) 13 | 14 | if err != nil { 15 | panic(err) 16 | } 17 | return client 18 | } 19 | -------------------------------------------------------------------------------- /twitter/debezium/register-postgres.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "inventory-connector", 3 | "config": { 4 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector", 5 | "tasks.max": "1", 6 | "database.hostname": "postgres", 7 | "database.port": "5432", 8 | "database.user": "postgres", 9 | "database.password": "12345678", 10 | "database.dbname": "smag-twitter", 11 | "database.server.name": "postgres" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /frontend/src/components/Button.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | function Button(props) { 4 | if (props.buttonlink) { 5 | return ( 6 |
7 | {props.children} 8 |
9 | ); 10 | } 11 | 12 | return ( 13 |
14 | {props.children} 15 |
16 | ); 17 | } 18 | 19 | export default Button; 20 | -------------------------------------------------------------------------------- /insta/indexer/faces/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o insta_faces_indexer \ 9 | insta/indexer/faces/insta_faces_indexer.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/insta_faces_indexer . 15 | CMD ["./insta_faces_indexer"] 16 | -------------------------------------------------------------------------------- /insta/indexer/posts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o insta_posts_indexer \ 9 | insta/indexer/posts/insta_posts_indexer.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/insta_posts_indexer . 15 | CMD ["./insta_posts_indexer"] 16 | -------------------------------------------------------------------------------- /insta/indexer/users/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o insta_users_indexer \ 9 | insta/indexer/users/insta_users_indexer.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/insta_users_indexer . 15 | CMD ["./insta_users_indexer"] 16 | -------------------------------------------------------------------------------- /aws_service/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o service.bin aws_service/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/service.bin /app 14 | WORKDIR /app 15 | CMD ["./service.bin"] 16 | -------------------------------------------------------------------------------- /twitter/inserter/posts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o twitter_inserter_posts \ 9 | twitter/inserter/posts/main/main.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/twitter_inserter_posts . 15 | CMD ["./twitter_inserter_posts"] 16 | -------------------------------------------------------------------------------- /twitter/inserter/users/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o twitter_inserter_users \ 9 | twitter/inserter/users/main/main.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/twitter_inserter_users . 15 | CMD ["./twitter_inserter_users"] 16 | -------------------------------------------------------------------------------- /twitter/scraper/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "twitter_scraper" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Urhengulas "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.7" 9 | twint = "^2.1.2" 10 | kafka-python = "^1.4.7" 11 | 12 | [tool.poetry.dev-dependencies] 13 | pylint = "^2.4.2" 14 | autopep8 = "^1.4.4" 15 | 16 | [build-system] 17 | requires = ["poetry>=0.12"] 18 | build-backend = "poetry.masonry.api" 19 | -------------------------------------------------------------------------------- /frontend/src/components/StatsCard.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | 4 | const StatsCard = props => { 5 | return ( 6 |
7 |

We were able to reuse

8 |

{props.count}

9 |

snippets of your data.

10 |
11 | ); 12 | }; 13 | 14 | StatsCard.propTypes = { 15 | count: PropTypes.number 16 | }; 17 | 18 | export default StatsCard; 19 | -------------------------------------------------------------------------------- /twitter/filter/user_names/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o twitter_filter_user_names \ 9 | twitter/filter/user_names/main.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/twitter_filter_user_names . 15 | CMD ["./twitter_filter_user_names"] 16 | -------------------------------------------------------------------------------- /insta/filter/user_names/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o kafka_changestream insta/filter/user_names/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | WORKDIR /app 12 | COPY http_header-generator/useragents.json . 13 | COPY --from=builder /app/kafka_changestream . 14 | CMD ["./kafka_changestream"] 15 | -------------------------------------------------------------------------------- /insta/indexer/comments/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build \ 7 | -installsuffix cgo \ 8 | -o insta_comments_indexer \ 9 | insta/indexer/comments/insta_comments_indexer.go 10 | 11 | FROM alpine 12 | RUN apk --no-cache add ca-certificates 13 | WORKDIR /app 14 | COPY --from=builder /app/insta_comments_indexer . 15 | CMD ["./insta_comments_indexer"] 16 | -------------------------------------------------------------------------------- /faces/proto/recognizer.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package proto; 4 | 5 | message RecognizeRequest { 6 | string url = 1; 7 | } 8 | 9 | message Face { 10 | int32 x = 1; 11 | int32 y = 2; 12 | int32 width = 3; 13 | int32 height = 4; 14 | repeated float encoding = 5; 15 | } 16 | 17 | message RegognizeResponse { 18 | repeated Face faces = 1; 19 | } 20 | 21 | 22 | service FaceRecognizer { 23 | rpc RecognizeFaces(RecognizeRequest) returns (RegognizeResponse); 24 | } 25 | -------------------------------------------------------------------------------- /frontend/src/components/ProfileCard.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | 4 | const ProfileCard = props => { 5 | return ( 6 |
7 | {props.alt} 12 |
13 | ); 14 | }; 15 | 16 | ProfileCard.propTypes = { 17 | pictureUrl: PropTypes.string 18 | }; 19 | 20 | export default ProfileCard; 21 | -------------------------------------------------------------------------------- /insta/inserter/postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o instascraper_postgres_inserter insta/inserter/postgres/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY --from=builder /app/instascraper_postgres_inserter /app 13 | WORKDIR /app 14 | CMD ["./instascraper_postgres_inserter"] 15 | -------------------------------------------------------------------------------- /frontend/src/components/BioCard.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import H2 from "./H2"; 3 | 4 | class BioCard extends Component { 5 | render() { 6 | return ( 7 |
8 | 9 | 10 | 13 | 14 |
11 |

{this.props.bio}

12 |
15 |
16 | ); 17 | } 18 | } 19 | 20 | export default BioCard; 21 | -------------------------------------------------------------------------------- /insta/scraper/likes/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_likes_scraper insta/scraper/likes/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_likes_scraper /app 14 | WORKDIR /app 15 | CMD ["./insta_likes_scraper"] 16 | -------------------------------------------------------------------------------- /insta/scraper/posts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_posts_scraper insta/scraper/posts/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_posts_scraper /app 14 | WORKDIR /app 15 | CMD ["./insta_posts_scraper"] 16 | -------------------------------------------------------------------------------- /insta/scraper/user/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o instascraper_scraper insta/scraper/user/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/instascraper_scraper /app 14 | WORKDIR /app 15 | CMD ["./instascraper_scraper"] 16 | -------------------------------------------------------------------------------- /scraper-client/scraper-client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import "net/http" 4 | 5 | // ScraperClient is some implementation of a http client usable for scraping 6 | type ScraperClient interface { 7 | WithRetries(times int, f func() error) error 8 | Do(request *http.Request) (*http.Response, error) 9 | } 10 | 11 | // HTTPStatusError ... 12 | type HTTPStatusError struct { 13 | S string 14 | } 15 | 16 | // Error for the error interface 17 | func (e *HTTPStatusError) Error() string { 18 | return e.S 19 | } 20 | -------------------------------------------------------------------------------- /frontend/src/components/IGPost.js: -------------------------------------------------------------------------------- 1 | import React, { Component, useState } from "react"; 2 | import { withRouter, history } from "react-router"; 3 | 4 | function IGPost({ post }) { 5 | return ( 6 |
7 |
8 | 9 | 10 | 11 |
12 |
13 | ); 14 | } 15 | 16 | export default IGPost; 17 | -------------------------------------------------------------------------------- /frontend/src/components/img/chevron-left-solid.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /insta/db/kubernetes/kube-register-postgres.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "inventory-connector", 3 | "config": { 4 | "plugin.name": "wal2json", 5 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector", 6 | "tasks.max": "1", 7 | "database.hostname": "my-postgres-postgresql", 8 | "database.port": "5432", 9 | "database.user": "postgres", 10 | "database.password": "12345678", 11 | "database.dbname": "instascraper", 12 | "database.server.name": "postgres" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /insta/inserter/likes/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_likes_inserter insta/inserter/likes/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_likes_inserter /app 14 | WORKDIR /app 15 | CMD ["./insta_likes_inserter"] 16 | -------------------------------------------------------------------------------- /insta/inserter/posts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_posts_inserter insta/inserter/posts/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_posts_inserter /app 14 | WORKDIR /app 15 | CMD ["./insta_posts_inserter"] 16 | -------------------------------------------------------------------------------- /neo4j/create-import-user-json/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod go.sum ./ 4 | RUN go mod download 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o create_import_json neo4j/create-import-user-json/main/main.go 7 | 8 | FROM alpine 9 | RUN apk --no-cache add ca-certificates 10 | RUN mkdir /app 11 | COPY http_header-generator/useragents.json /app 12 | COPY --from=builder /app/create_import_json /app 13 | WORKDIR /app 14 | CMD ["./create_import_json"] 15 | -------------------------------------------------------------------------------- /insta/scraper/comments/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_comments_scraper insta/scraper/comments/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_comments_scraper /app 14 | WORKDIR /app 15 | CMD ["./insta_comments_scraper"] 16 | -------------------------------------------------------------------------------- /elastic/models/insta.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type InstaUser struct { 4 | ID int `json:"id"` 5 | Username string `json:"user_name"` 6 | Realname string `json:"real_name"` 7 | Bio string `json:"bio"` 8 | } 9 | 10 | type InstaPost struct { 11 | ID int `json:"id"` 12 | UserID int `json:"user_id"` 13 | Caption string `json:"caption"` 14 | } 15 | 16 | type InstaComment struct { 17 | ID int `json:"id"` 18 | PostID int `json:"post_id"` 19 | Comment string `json:"comment_text"` 20 | } 21 | -------------------------------------------------------------------------------- /insta/inserter/comments/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13 as builder 2 | WORKDIR /app 3 | COPY go.mod . 4 | COPY go.sum . 5 | RUN go mod download 6 | COPY . . 7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_comments_inserter insta/inserter/comments/main/main.go 8 | 9 | FROM alpine 10 | RUN apk --no-cache add ca-certificates 11 | RUN mkdir /app 12 | COPY http_header-generator/useragents.json /app 13 | COPY --from=builder /app/insta_comments_inserter /app 14 | WORKDIR /app 15 | CMD ["./insta_comments_inserter"] 16 | -------------------------------------------------------------------------------- /frontend/src/components/LocationCard.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import H2 from "./H2"; 3 | 4 | class LocationCard extends Component { 5 | render() { 6 | return ( 7 |
8 | 9 | 10 | 13 | 14 |
11 |

{this.props.bio}

12 |
15 |
16 | ); 17 | } 18 | } 19 | 20 | export default LocationCard; 21 | -------------------------------------------------------------------------------- /elastic/build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu as plugin-builder 2 | RUN apt update && apt install -y git maven openjdk-8-jdk 3 | WORKDIR /src 4 | RUN git clone -b es-7.1 --single-branch https://github.com/lior-k/fast-elasticsearch-vector-scoring.git 5 | WORKDIR /src/fast-elasticsearch-vector-scoring 6 | RUN mvn package 7 | 8 | FROM elasticsearch:7.1.0 9 | COPY --from=plugin-builder /src/fast-elasticsearch-vector-scoring/target/releases/elasticsearch-binary-vector-scoring-7.1.0.zip /plugins/ 10 | RUN bin/elasticsearch-plugin install file:///plugins/elasticsearch-binary-vector-scoring-7.1.0.zip 11 | -------------------------------------------------------------------------------- /frontend/src/components/EndButton.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; 3 | import { faTimes } from "@fortawesome/free-solid-svg-icons"; 4 | import { Link } from "react-router-dom"; 5 | import "./../css/endButton.css"; 6 | 7 | class EndButton extends Component { 8 | render() { 9 | return ( 10 |
11 | 12 | 13 | 14 |
15 | ); 16 | } 17 | } 18 | 19 | export default EndButton; 20 | -------------------------------------------------------------------------------- /insta/scraper/user/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/user" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | ) 8 | 9 | func main() { 10 | nameReaderConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig() 11 | 12 | s := scraper.New( 13 | kafka.NewReader(nameReaderConfig), 14 | kafka.NewWriter(infoWriterConfig), 15 | kafka.NewWriter(errWriterConfig), 16 | ) 17 | service.CloseOnSignal(s) 18 | waitUntilDone := s.Start() 19 | 20 | waitUntilDone() 21 | } 22 | -------------------------------------------------------------------------------- /config/postgres-config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "github.com/codeuniversity/smag-mvp/utils" 4 | 5 | // PostgresConfig holds all the configurable variables for Postgres 6 | type PostgresConfig struct { 7 | PostgresHost string 8 | PostgresPassword string 9 | } 10 | 11 | //GetPostgresConfig returns a inizialized Postgres Config 12 | func GetPostgresConfig() *PostgresConfig { 13 | return &PostgresConfig{ 14 | PostgresHost: utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1"), 15 | PostgresPassword: utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", ""), 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /utils/neo4j-utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | // Neo4jConfig holds all necessary informations to connect to a neo4j database 4 | type Neo4jConfig struct { 5 | Host string 6 | Username string 7 | Password string 8 | } 9 | 10 | //getNeo4jConfig returns a initialized Neo4jConfig object by reading the values from env variables 11 | func GetNeo4jConfig() *Neo4jConfig { 12 | return &Neo4jConfig{ 13 | Host: GetStringFromEnvWithDefault("NEO4J_HOST", "localhost"), 14 | Username: GetStringFromEnvWithDefault("NEO4J_USERNAME", "neo4j"), 15 | Password: GetStringFromEnvWithDefault("NEO4J_PASSWORD", ""), 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /twitter/scraper/requirements.txt: -------------------------------------------------------------------------------- 1 | aiodns==2.0.0 2 | aiohttp==3.6.2 3 | aiohttp-socks==0.2.2 4 | async-timeout==3.0.1 5 | attrs==19.3.0 6 | beautifulsoup4==4.8.1 7 | cchardet==2.1.4 8 | cffi==1.13.0 9 | chardet==3.0.4 10 | elasticsearch==7.0.5 11 | fake-useragent==0.1.11 12 | geographiclib==1.50 13 | geopy==1.20.0 14 | idna==2.8 15 | kafka-python==1.4.7 16 | multidict==4.5.2 17 | numpy==1.17.2 18 | pandas==0.25.1 19 | pycares==3.0.0 20 | pycparser==2.19 21 | pysocks==1.7.1 22 | python-dateutil==2.8.0 23 | pytz==2019.3 24 | schedule==0.6.0 25 | six==1.12.0 26 | soupsieve==1.9.4 27 | twint==2.1.2 28 | urllib3==1.25.6 29 | yarl==1.3.0 30 | -------------------------------------------------------------------------------- /frontend/src/components/InterestCard.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import Slideshow from "./Slideshow"; 4 | import InterestFooter from "./InterestFooter"; 5 | 6 | const InterestCard = props => { 7 | return ( 8 |
9 |
10 | 11 | 12 |
13 |
14 | ); 15 | }; 16 | 17 | InterestCard.propTypes = { 18 | slides: PropTypes.array, 19 | title: PropTypes.string, 20 | details: PropTypes.string 21 | }; 22 | 23 | export default InterestCard; 24 | -------------------------------------------------------------------------------- /tools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | 3 | RUN apt update && apt install curl wget file xz-utils postgresql-client zsh git kafkacat jq -y 4 | 5 | # install github.com/fgeller/kt 6 | RUN wget https://github.com/fgeller/kt/releases/download/v12.1.0/kt-v12.1.0-linux-amd64.txz && \ 7 | cat kt-v12.1.0-linux-amd64.txz | unxz > kt-v12.1.0-linux-amd64 && \ 8 | tar -xvf kt-v12.1.0-linux-amd64 && \ 9 | mv kt /usr/local/bin && \ 10 | rm kt-v12.1.0-linux-amd64.txz && \ 11 | rm kt-v12.1.0-linux-amd64 12 | 13 | RUN zsh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)" 14 | COPY .zshrc /root 15 | 16 | WORKDIR /home/tools 17 | 18 | ENTRYPOINT [ "zsh"] 19 | -------------------------------------------------------------------------------- /insta/db/kubernetes/kube-start-postgres.sh: -------------------------------------------------------------------------------- 1 | migrate -database "postgres://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:5432/instascraper?sslmode=disable" -path db/migrations up 2 | 3 | jq --arg name "$POSTGRES_USER" --arg password "$POSTGRES_PASSWORD" --arg host "$POSTGRES_HOST" --arg dbname "$POSTGRES_DATABASE" '.config."database.user"=$name | .config."database.password"=$password | .config."database.hostname"=$host | .config."database.dbname"=$dbname' kube-register-postgres.json > kube-register-postgres-secret.json 4 | 5 | curl -i -X POST -H "Accept:application/json" \ 6 | -H "Content-Type:application/json" \ 7 | http://deb-connect-service:8083/connectors/ \ 8 | -d @kube-register-postgres-secret.json 9 | -------------------------------------------------------------------------------- /insta/inserter/likes/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/likes" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 13 | 14 | qReaderConfig := kafka.GetInserterConfig() 15 | 16 | s := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig)) 17 | 18 | service.CloseOnSignal(s) 19 | waitUntilClosed := s.Start() 20 | 21 | waitUntilClosed() 22 | } 23 | -------------------------------------------------------------------------------- /tools/.zshrc: -------------------------------------------------------------------------------- 1 | 2 | # If you come from bash you might have to change your $PATH. 3 | # export PATH=$HOME/bin:/usr/local/bin:$PATH 4 | 5 | # Path to your oh-my-zsh installation. 6 | export ZSH="/root/.oh-my-zsh" 7 | 8 | # Set name of the theme to load --- if set to "random", it will 9 | # load a random theme each time oh-my-zsh is loaded, in which case, 10 | # to know which specific one was loaded, run: echo $RANDOM_THEME 11 | # See https://github.com/robbyrussell/oh-my-zsh/wiki/Themes 12 | ZSH_THEME="miloshadzic" 13 | 14 | 15 | DISABLE_AUTO_UPDATE="true" 16 | 17 | plugins=(git) 18 | 19 | source $ZSH/oh-my-zsh.sh 20 | 21 | export RPROMPT="%{$fg_bold[blue]%} codesmag/tools %{$fg_bold[blue]%} [%D{%y/%m/%f}|%@] %{$reset_color%}%%" 22 | -------------------------------------------------------------------------------- /insta/inserter/comments/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/comments" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 13 | 14 | qReaderConfig := kafka.GetInserterConfig() 15 | 16 | s := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig)) 17 | 18 | service.CloseOnSignal(s) 19 | waitUntilClosed := s.Start() 20 | 21 | waitUntilClosed() 22 | } 23 | -------------------------------------------------------------------------------- /scraper-client/scraper-config.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import "github.com/codeuniversity/smag-mvp/utils" 4 | 5 | type ScraperConfig struct { 6 | ElasticAssignmentTimeout int 7 | RequestTimeout int 8 | RequestRetryCount int 9 | ElasticIpRetryCount int 10 | } 11 | 12 | func GetScraperConfig() *ScraperConfig { 13 | return &ScraperConfig{ 14 | ElasticAssignmentTimeout: utils.GetNumberFromEnvWithDefault("ELASTIC_ASSIGNMENT_TIMEOUT", 10000), 15 | RequestTimeout: utils.GetNumberFromEnvWithDefault("REQUEST_TIMEOUT", 1000), 16 | RequestRetryCount: utils.GetNumberFromEnvWithDefault("REQUEST_RETRY_COUNT", 3), 17 | ElasticIpRetryCount: utils.GetNumberFromEnvWithDefault("ELASTIC_IP_RETRY_COUNT", 2), 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /neo4j/create-import-user-json/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | neo4j_import "github.com/codeuniversity/smag-mvp/neo4j/create-import-user-json" 5 | "github.com/codeuniversity/smag-mvp/service" 6 | "github.com/codeuniversity/smag-mvp/utils" 7 | ) 8 | 9 | func main() { 10 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 11 | 12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 13 | rTopic := utils.MustGetStringFromEnv("KAFKA_CHANGE_TOPIC") 14 | kafkaChunk := utils.GetNumberFromEnvWithDefault("KAFKA_MESSAGE_CHUNK", 10) 15 | 16 | i := neo4j_import.New(kafkaAddress, rTopic, groupID, kafkaChunk) 17 | 18 | service.CloseOnSignal(i) 19 | waitUntilClosed := i.Start() 20 | 21 | waitUntilClosed() 22 | } 23 | -------------------------------------------------------------------------------- /insta/inserter/postgres/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/postgres" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | var i *inserter.Inserter 12 | 13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 15 | 16 | qReaderConfig := kafka.GetInserterConfig() 17 | 18 | i = inserter.New( 19 | postgresHost, 20 | postgresPassword, 21 | kafka.NewReader(qReaderConfig), 22 | ) 23 | 24 | service.CloseOnSignal(i) 25 | waitUntilClosed := i.Start() 26 | 27 | waitUntilClosed() 28 | } 29 | -------------------------------------------------------------------------------- /frontend/src/pages/Greeting.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import Button from "../components/Button"; 3 | import "./../index.css"; 4 | import H1 from "../components/H1"; 5 | import H2 from "../components/H2"; 6 | 7 | class Greeting extends Component { 8 | render() { 9 | return ( 10 |
11 |
12 |
13 |

Welcome to SocialRecord

14 |

Sit back and enjoy the experience.

15 |
16 | 17 |
18 |
19 |
20 |
21 | ); 22 | } 23 | } 24 | 25 | export default Greeting; 26 | -------------------------------------------------------------------------------- /frontend/src/pages/GroupIntent.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import Button from "../components/Button"; 3 | import "./../index.css"; 4 | import H1 from "../components/H1"; 5 | import H2 from "../components/H2"; 6 | 7 | function GroupIntent({ nextPage }) { 8 | return ( 9 |
10 |
11 |
12 |

Anyone can do this

13 |

We had limited time and money.

14 |

Imagine what others could do with this power.

15 |
16 | 17 |
18 |
19 |
20 |
21 | ); 22 | } 23 | 24 | export default GroupIntent; 25 | -------------------------------------------------------------------------------- /frontend/src/components/Slideshow.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import { Zoom } from "react-slideshow-image"; 4 | 5 | const properties = { 6 | duration: 5000, 7 | transitionDuration: 300, 8 | indicators: false, 9 | scale: 1.4, 10 | arrows: false 11 | }; 12 | 13 | const Slideshow = props => { 14 | return ( 15 |
16 | 17 | {props.slides.map((imageUrl, index) => ( 18 | slide 24 | ))} 25 | 26 |
27 | ); 28 | }; 29 | 30 | Slideshow.propTypes = { 31 | slides: PropTypes.array 32 | }; 33 | 34 | export default Slideshow; 35 | -------------------------------------------------------------------------------- /insta/scraper/posts/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/posts" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | client "github.com/codeuniversity/smag-mvp/scraper-client" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | ) 10 | 11 | func main() { 12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "") 13 | nameReaderConfig, infoWriterConfig, errWriterConfig := kafka.GetInstaPostsScraperConfig() 14 | 15 | config := client.GetScraperConfig() 16 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(nameReaderConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig)) 17 | 18 | service.CloseOnSignal(s) 19 | waitUntilClosed := s.Start() 20 | 21 | waitUntilClosed() 22 | } 23 | -------------------------------------------------------------------------------- /frontend/src/components/Form.js: -------------------------------------------------------------------------------- 1 | import React, { Component, useState } from "react"; 2 | import { withRouter, history } from "react-router"; 3 | 4 | // import {onSubmit} from './App'; 5 | 6 | // eslint-disable-next-line 7 | function Form(props) { 8 | const [value, setValue] = useState(""); 9 | 10 | return ( 11 |
12 |
{ 14 | e.preventDefault(); 15 | props.onSubmit(value); 16 | }} 17 | > 18 | { 24 | e.preventDefault(); 25 | setValue(e.target.value); 26 | }} 27 | /> 28 | 29 |
30 |
31 | ); 32 | } 33 | 34 | export default Form; 35 | -------------------------------------------------------------------------------- /twitter/inserter/posts/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/codeuniversity/smag-mvp/kafka" 5 | "github.com/codeuniversity/smag-mvp/service" 6 | inserter "github.com/codeuniversity/smag-mvp/twitter/inserter/posts" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | var i *inserter.Inserter 12 | 13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 15 | postgresDBNAme := utils.GetStringFromEnvWithDefault("POSTGRES_DB_NAME", "twitter_scraper") 16 | 17 | qReaderConfig := kafka.GetInserterConfig() 18 | 19 | i = inserter.New( 20 | postgresHost, 21 | postgresPassword, 22 | postgresDBNAme, 23 | kafka.NewReader(qReaderConfig), 24 | ) 25 | 26 | service.CloseOnSignal(i) 27 | waitUntilDone := i.Start() 28 | waitUntilDone() 29 | } 30 | -------------------------------------------------------------------------------- /twitter/inserter/users/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/codeuniversity/smag-mvp/kafka" 5 | "github.com/codeuniversity/smag-mvp/service" 6 | inserter "github.com/codeuniversity/smag-mvp/twitter/inserter/users" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | var i *inserter.Inserter 12 | 13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 15 | postgresDBNAme := utils.GetStringFromEnvWithDefault("POSTGRES_DB_NAME", "twitter_scraper") 16 | 17 | qReaderConfig := kafka.GetInserterConfig() 18 | 19 | i = inserter.New( 20 | postgresHost, 21 | postgresPassword, 22 | postgresDBNAme, 23 | kafka.NewReader(qReaderConfig), 24 | ) 25 | 26 | service.CloseOnSignal(i) 27 | waitUntilDone := i.Start() 28 | waitUntilDone() 29 | } 30 | -------------------------------------------------------------------------------- /frontend/src/components/Popup.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component, useState } from "react"; 2 | import { withRouter, history } from "react-router"; 3 | import Button from "../components/Button"; 4 | import H1 from "../components/H1"; 5 | 6 | function Popup() { 7 | return ( 8 |
9 |
10 |

Is this your profile?

11 | 12 |
username
13 |
14 |
15 | 16 |
17 |
18 | 19 |
20 |
21 |
22 |
23 | ); 24 | } 25 | 26 | export default Popup; 27 | -------------------------------------------------------------------------------- /insta/scraper/likes/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/likes" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | client "github.com/codeuniversity/smag-mvp/scraper-client" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | ) 10 | 11 | func main() { 12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "") 13 | commentLimit := utils.GetNumberFromEnvWithDefault("LIKE_LIMIT", 24) 14 | readerConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig() 15 | 16 | config := client.GetScraperConfig() 17 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(readerConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig), commentLimit) 18 | 19 | service.CloseOnSignal(s) 20 | waitUntilClosed := s.Start() 21 | 22 | waitUntilClosed() 23 | } 24 | -------------------------------------------------------------------------------- /frontend/public/favicon/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "App", 3 | "icons": [ 4 | { 5 | "src": "\/android-icon-36x36.png", 6 | "sizes": "36x36", 7 | "type": "image\/png", 8 | "density": "0.75" 9 | }, 10 | { 11 | "src": "\/android-icon-48x48.png", 12 | "sizes": "48x48", 13 | "type": "image\/png", 14 | "density": "1.0" 15 | }, 16 | { 17 | "src": "\/android-icon-72x72.png", 18 | "sizes": "72x72", 19 | "type": "image\/png", 20 | "density": "1.5" 21 | }, 22 | { 23 | "src": "\/android-icon-96x96.png", 24 | "sizes": "96x96", 25 | "type": "image\/png", 26 | "density": "2.0" 27 | }, 28 | { 29 | "src": "\/android-icon-144x144.png", 30 | "sizes": "144x144", 31 | "type": "image\/png", 32 | "density": "3.0" 33 | }, 34 | { 35 | "src": "\/android-icon-192x192.png", 36 | "sizes": "192x192", 37 | "type": "image\/png", 38 | "density": "4.0" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /insta/scraper/comments/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/comments" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | client "github.com/codeuniversity/smag-mvp/scraper-client" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | ) 10 | 11 | func main() { 12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "") 13 | commentLimit := utils.GetNumberFromEnvWithDefault("COMMENT_LIMIT", 24) 14 | readerConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig() 15 | 16 | config := client.GetScraperConfig() 17 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(readerConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig), commentLimit) 18 | 19 | service.CloseOnSignal(s) 20 | waitUntilClosed := s.Start() 21 | 22 | waitUntilClosed() 23 | } 24 | -------------------------------------------------------------------------------- /insta/pics-downloader/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/codeuniversity/smag-mvp/config" 5 | downloader "github.com/codeuniversity/smag-mvp/insta/pics-downloader" 6 | "github.com/codeuniversity/smag-mvp/kafka" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | ) 10 | 11 | func main() { 12 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 13 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 14 | jobsTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_DOWNLOADS_TOPIC") 15 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsTopic)) 16 | 17 | s3Config := config.GetS3Config() 18 | postgresConfig := config.GetPostgresConfig() 19 | 20 | i := downloader.New(qReader, s3Config, postgresConfig) 21 | 22 | service.CloseOnSignal(i) 23 | waitUntilClosed := i.Start() 24 | 25 | waitUntilClosed() 26 | } 27 | -------------------------------------------------------------------------------- /insta/inserter/posts/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/posts" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 13 | 14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 15 | 16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 17 | rTopic := utils.MustGetStringFromEnv("KAFKA_INSTA_POSTS_TOPIC") 18 | qReaderConfig := kafka.NewReaderConfig(kafkaAddress, groupID, rTopic) 19 | 20 | i := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig)) 21 | 22 | service.CloseOnSignal(i) 23 | waitUntilClosed := i.Start() 24 | 25 | waitUntilClosed() 26 | } 27 | -------------------------------------------------------------------------------- /config/s3-config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "github.com/codeuniversity/smag-mvp/utils" 4 | 5 | // S3Config holds all the configurable variables for S3 6 | type S3Config struct { 7 | S3BucketName string 8 | S3Region string 9 | S3Endpoint string 10 | S3AccessKeyID string 11 | S3SecretAccessKey string 12 | S3UseSSL bool 13 | } 14 | 15 | //GetS3Config returns a inizialized S3 Config 16 | func GetS3Config() *S3Config { 17 | return &S3Config{ 18 | S3BucketName: utils.GetStringFromEnvWithDefault("S3_BUCKET_NAME", "insta_pics"), 19 | S3Region: utils.GetStringFromEnvWithDefault("S3_REGION", "eu-west-1"), 20 | S3Endpoint: utils.GetStringFromEnvWithDefault("S3_ENDOINT", "127.0.0.1:9000"), 21 | S3AccessKeyID: utils.MustGetStringFromEnv("S3_ACCESS_KEY_ID"), 22 | S3SecretAccessKey: utils.MustGetStringFromEnv("S3_SECRET_ACCESS_KEY"), 23 | S3UseSSL: utils.GetBoolFromEnvWithDefault("S3_USE_SSL", true), 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /db/utils.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "github.com/jinzhu/gorm" 5 | // necessary for gorm :pointup: 6 | _ "github.com/jinzhu/gorm/dialects/postgres" 7 | ) 8 | 9 | // CreateOrUpdate checks whether a specific (gorm) database entry already exists using a model filter, 10 | // creates it in case no record is found and updates the first in case of existing record(s) 11 | func CreateOrUpdate(db *gorm.DB, out interface{}, where interface{}, update interface{}) error { 12 | var err error 13 | 14 | tx := db.Begin() 15 | if tx.Where(where).First(out).RecordNotFound() { 16 | err = tx.Create(update).Scan(out).Error 17 | } else { 18 | err = tx.Model(out).Update(update).Scan(out).Error 19 | } 20 | if err != nil { 21 | tx.Rollback() 22 | return err 23 | } 24 | tx.Commit() 25 | 26 | return nil 27 | } 28 | 29 | //Create just create a new entry in the database 30 | func Create(db *gorm.DB, out interface{}, update interface{}) error { 31 | return db.Create(update).Scan(out).Error 32 | } 33 | -------------------------------------------------------------------------------- /elastic/search/facetest/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/codeuniversity/smag-mvp/elastic" 8 | "github.com/codeuniversity/smag-mvp/elastic/search/faces" 9 | "github.com/codeuniversity/smag-mvp/faces/proto" 10 | "google.golang.org/grpc" 11 | ) 12 | 13 | func main() { 14 | 15 | if len(os.Args) != 2 { 16 | panic("requires exactly one param - the url to an image") 17 | } 18 | 19 | con, err := grpc.Dial("localhost:6666", grpc.WithInsecure()) 20 | if err != nil { 21 | panic(err) 22 | } 23 | faceReconClient := proto.NewFaceRecognizerClient(con) 24 | esClient := elastic.InitializeElasticSearch([]string{"http://localhost:9200"}) 25 | 26 | client := &faces.Client{ 27 | FaceRecognitionClient: faceReconClient, 28 | ESClient: esClient, 29 | } 30 | 31 | faces, err := client.FindSimilarFacesInImage(os.Args[1], 10) 32 | 33 | fmt.Printf("\n (") 34 | for _, face := range faces { 35 | fmt.Printf("%d,", face.PostID) 36 | } 37 | fmt.Printf(")\n") 38 | } 39 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/utils.py: -------------------------------------------------------------------------------- 1 | import twint 2 | 3 | 4 | def get_conf(user_name: str) -> twint.config.Config: 5 | c = twint.Config() 6 | c.Username = user_name 7 | c.Store_object = True 8 | c.Hide_output = True 9 | return c 10 | 11 | 12 | class ShallowTwitterUser(object): 13 | 14 | # full user object contains 15 | 16 | # id: str = "" 17 | # url: str = "" 18 | # type: str = "" 19 | # name: str = "" 20 | # username: str = "" 21 | # bio: str = "" 22 | # avatar: str = "" 23 | # background_image: str = "" 24 | # location: str = "" 25 | # join_date: str = "" 26 | # join_time: str = "" 27 | # is_private: int = 0 28 | # is_verified: int = 0 29 | # following: int = 0 30 | # following_list: List[str] = [""] 31 | # followers: int = 0 32 | # followers_list: List[str] = [""] 33 | # tweets: int = 0 34 | # likes: int = 0 35 | # media_count: int = 0 36 | 37 | def __init__(self, username): 38 | self.username = username 39 | -------------------------------------------------------------------------------- /insta/inserter/posts_face/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/posts_face" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | var i *inserter.Inserter 12 | 13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1") 14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "") 15 | 16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 18 | jobsReadTopic := utils.GetStringFromEnvWithDefault("KAFKA_FACE_DETECTION_RESULTS_TOPIC", "insta_posts_detected_faces") 19 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic)) 20 | 21 | i = inserter.New( 22 | postgresHost, 23 | postgresPassword, 24 | qReader, 25 | ) 26 | 27 | service.CloseOnSignal(i) 28 | waitUntilClosed := i.Start() 29 | 30 | waitUntilClosed() 31 | } 32 | -------------------------------------------------------------------------------- /faces/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim-stretch 2 | 3 | RUN apt-get -y update 4 | RUN apt-get install -y --fix-missing \ 5 | build-essential \ 6 | cmake \ 7 | gfortran \ 8 | git \ 9 | wget \ 10 | curl \ 11 | graphicsmagick \ 12 | libgraphicsmagick1-dev \ 13 | libatlas-dev \ 14 | libavcodec-dev \ 15 | libavformat-dev \ 16 | libgtk2.0-dev \ 17 | libjpeg-dev \ 18 | liblapack-dev \ 19 | libswscale-dev \ 20 | pkg-config \ 21 | python3-dev \ 22 | python3-numpy \ 23 | software-properties-common \ 24 | zip \ 25 | && apt-get clean && rm -rf /tmp/* /var/tmp/* 26 | 27 | RUN cd ~ && \ 28 | mkdir -p dlib && \ 29 | git clone -b 'v19.9' --single-branch https://github.com/davisking/dlib.git dlib/ && \ 30 | cd dlib/ && \ 31 | python3 setup.py install --yes USE_AVX_INSTRUCTIONS 32 | 33 | WORKDIR /src 34 | 35 | COPY requirements.txt . 36 | 37 | RUN pip install --no-cache-dir -r requirements.txt 38 | 39 | COPY recognizer_pb2_grpc.py . 40 | COPY recognizer_pb2.py . 41 | COPY recognizer.py . 42 | COPY metrics.py . 43 | COPY server.py . 44 | 45 | CMD [ "python", "server.py" ] 46 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # API 2 | gen-server: 3 | protoc --go_out=plugins=grpc:. api/proto/usersearch.proto 4 | 5 | gen-client: 6 | protoc -I=api/proto/ usersearch.proto \ 7 | --js_out=import_style=commonjs:frontend/src/protofiles/ \ 8 | --grpc-web_out=import_style=commonjs,mode=grpcwebtext:frontend/src/protofiles/ 9 | 10 | gen-faces: 11 | protoc --go_out=plugins=grpc:. faces/proto/recognizer.proto 12 | 13 | # INSTAGRAM 14 | 15 | INSTAGRAM_COMPOSE_FILE:=insta-compose.yml 16 | 17 | run-instagram: 18 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up -d --build es-with-plugin zookeeper my-kafka postgres connect minio neo4j 19 | sleep 5 20 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up --build migrate-postgres 21 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up -d --build 22 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) logs -f 23 | 24 | 25 | # TWITTER 26 | 27 | TWITTER_COMPOSE_FILE:=twitter-compose.yml 28 | 29 | run-twitter: 30 | docker-compose -f $(TWITTER_COMPOSE_FILE) up -d my-kafka postgres connect 31 | sleep 5 32 | docker-compose -f $(TWITTER_COMPOSE_FILE) up -d --build 33 | docker-compose -f $(TWITTER_COMPOSE_FILE) logs -f 34 | -------------------------------------------------------------------------------- /frontend/src/pages/endscreen.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import { withRouter } from "react-router"; 3 | import Button from "../components/Button"; 4 | import "./../index.css"; 5 | import H1 from "../components/H1"; 6 | import H2 from "../components/H2"; 7 | 8 | class EndScreen extends Component { 9 | render() { 10 | return ( 11 |
12 |
13 |
14 |

15 | Think again about whether all this data about you should be 16 | visible to everyone! 17 |

18 |

If not, we want to give you 3 important tips.

19 |
20 |

21 | 1. Think twice about what information you want to make public. 22 |

23 |

2. Check your private settings again.

24 |

3. Switch your profile to private.

25 |
26 | 27 |
28 |
29 |
30 | ); 31 | } 32 | } 33 | 34 | export default EndScreen; 35 | -------------------------------------------------------------------------------- /faces/recognitiontest/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/codeuniversity/smag-mvp/faces/proto" 7 | "github.com/codeuniversity/smag-mvp/imgproxy" 8 | "google.golang.org/grpc" 9 | ) 10 | 11 | func main() { 12 | con, err := grpc.Dial("localhost:6666", grpc.WithInsecure()) 13 | if err != nil { 14 | panic(err) 15 | } 16 | c := proto.NewFaceRecognizerClient(con) 17 | response, err := c.RecognizeFaces(context.Background(), &proto.RecognizeRequest{ 18 | Url: "https://cdn.madaracosmetics.com/media/catalog/category/FACE_OK_3.jpg", 19 | }) 20 | if err != nil { 21 | panic(err) 22 | } 23 | p, err := imgproxy.New("localhost:8080", "5800c215e5cd5110365c390e83752526fa40758efa4dcc406e3a4fdd6e22877c", "520f986b998545b4785e0defbc4f3c1203f22de2374a3d53cb7a7fe9fea309c5") 24 | if err != nil { 25 | panic(err) 26 | } 27 | faces := response.Faces 28 | for _, face := range faces { 29 | fmt.Println(face) 30 | x := int(face.X) 31 | y := int(face.Y) 32 | width := int(face.Width) 33 | height := int(face.Height) 34 | url := p.GetCropURL(x, y, width, height, "https://cdn.madaracosmetics.com/media/catalog/category/FACE_OK_3.jpg") 35 | fmt.Println(url) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/insert_seed.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from time import sleep 5 | 6 | from kafka import KafkaProducer 7 | 8 | 9 | def main(): 10 | kafka_host_port = os.getenv("KAFKA_HOST_PORT", "localhost:9092") 11 | seed_name = os.getenv("SEED_NAME", "wpbdry") 12 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "user_names") 13 | wait = int(os.getenv("SLEEP_SECONDS", "0")) 14 | 15 | logging.info(f"sleep for {wait} seconds") 16 | sleep(wait) 17 | 18 | producer = KafkaProducer( 19 | bootstrap_servers=kafka_host_port, 20 | value_serializer=lambda v: json.dumps(v).encode('utf-8'), 21 | reconnect_backoff_ms=500, 22 | reconnect_backoff_max_ms=5000, 23 | ) 24 | 25 | logging.info(f"sleep for {wait} seconds") 26 | sleep(wait) 27 | 28 | logging.info(f"Send user_name {seed_name} to kafka/{insert_topic}") 29 | producer.send(insert_topic, seed_name) 30 | producer.flush() 31 | 32 | 33 | if __name__ == "__main__": 34 | logging.basicConfig( 35 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s", 36 | datefmt="%H:%M:%S", 37 | level=logging.INFO, 38 | ) 39 | main() 40 | -------------------------------------------------------------------------------- /api/grpcserver/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | server "github.com/codeuniversity/smag-mvp/api/grpcserver" 5 | "github.com/codeuniversity/smag-mvp/config" 6 | "github.com/codeuniversity/smag-mvp/kafka" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | kgo "github.com/segmentio/kafka-go" 9 | ) 10 | 11 | func main() { 12 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "") 13 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "") 14 | grpcPort := utils.GetStringFromEnvWithDefault("GRPC_PORT", "10000") 15 | uploadBucket := utils.MustGetStringFromEnv("S3_UPLOAD_BUCKET_NAME") 16 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9200"}) 17 | recognitionServiceAddress := utils.MustGetStringFromEnv("RECOGNITION_SERVICE_ADDRESS") 18 | s3Config := config.GetS3Config() 19 | postgresConfig := config.GetPostgresConfig() 20 | 21 | var writer *kgo.Writer 22 | if kafkaAddress != "" && namesTopic != "" { 23 | writer = kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, namesTopic, false)) 24 | } 25 | 26 | s := server.NewGrpcServer(grpcPort, writer, s3Config, uploadBucket, postgresConfig, esHosts, recognitionServiceAddress) 27 | 28 | s.Listen() 29 | } 30 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/posts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13-alpine as builder 2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig 3 | # clone seabolt-1.7.0 source code 4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt 5 | # invoke cmake build and install artifacts - default location is /usr/local 6 | WORKDIR /seabolt/build 7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect 8 | # in our generated pkg-config file 9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install 10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum 11 | 12 | WORKDIR /app 13 | COPY go.mod . 14 | COPY go.sum . 15 | RUN go mod download 16 | COPY . . 17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_posts-inserter insta/inserter/neo4j/posts/main.go 18 | 19 | FROM alpine 20 | RUN apk --no-cache add ca-certificates 21 | RUN mkdir /app 22 | COPY http_header-generator/useragents.json /app 23 | COPY --from=builder /app/neo4j_posts-inserter /app 24 | WORKDIR /app 25 | CMD ["./neo4j_posts-inserter"] 26 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/user/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13-alpine as builder 2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig 3 | # clone seabolt-1.7.0 source code 4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt 5 | # invoke cmake build and install artifacts - default location is /usr/local 6 | WORKDIR /seabolt/build 7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect 8 | # in our generated pkg-config file 9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install 10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum 11 | 12 | WORKDIR /app 13 | COPY go.mod . 14 | COPY go.sum . 15 | RUN go mod download 16 | COPY . . 17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_user-inserter insta/inserter/neo4j/user/main.go 18 | 19 | 20 | FROM alpine 21 | RUN apk --no-cache add ca-certificates 22 | RUN mkdir /app 23 | COPY http_header-generator/useragents.json /app 24 | COPY --from=builder /app/neo4j_user-inserter /app 25 | WORKDIR /app 26 | CMD ["./neo4j_user-inserter"] 27 | -------------------------------------------------------------------------------- /scraper-client/simple-scraper-client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | generator "github.com/codeuniversity/smag-mvp/http_header-generator" 8 | ) 9 | 10 | // SimpleScraperClient handles retries and setting random headers for scraping 11 | type SimpleScraperClient struct { 12 | currentAddress string 13 | client *http.Client 14 | instanceID string 15 | *generator.HTTPHeaderGenerator 16 | } 17 | 18 | // NewSimpleScraperClient returns an initialized SimpleScraperClient 19 | func NewSimpleScraperClient() *SimpleScraperClient { 20 | client := &SimpleScraperClient{} 21 | client.HTTPHeaderGenerator = generator.New() 22 | client.client = &http.Client{} 23 | return client 24 | } 25 | 26 | // WithRetries calls f with retries 27 | func (s *SimpleScraperClient) WithRetries(times int, f func() error) error { 28 | var err error 29 | for i := 0; i < times; i++ { 30 | err = f() 31 | 32 | if err == nil { 33 | return nil 34 | } 35 | time.Sleep(100 * time.Millisecond) 36 | } 37 | return err 38 | } 39 | 40 | // Do the request with correct headers 41 | func (s *SimpleScraperClient) Do(request *http.Request) (*http.Response, error) { 42 | s.AddHeaders(&request.Header) 43 | return s.client.Do(request) 44 | } 45 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/tagged_users/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.13-alpine as builder 2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig 3 | # clone seabolt-1.7.0 source code 4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt 5 | # invoke cmake build and install artifacts - default location is /usr/local 6 | WORKDIR /seabolt/build 7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect 8 | # in our generated pkg-config file 9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install 10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum 11 | 12 | WORKDIR /app 13 | COPY go.mod . 14 | COPY go.sum . 15 | RUN go mod download 16 | COPY . . 17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_user-inserter insta/inserter/neo4j/tagged_users/main.go 18 | 19 | FROM alpine 20 | RUN apk --no-cache add ca-certificates 21 | RUN mkdir /app 22 | COPY http_header-generator/useragents.json /app 23 | COPY --from=builder /app/neo4j_user-inserter /app 24 | WORKDIR /app 25 | CMD ["./neo4j_user-inserter"] 26 | -------------------------------------------------------------------------------- /insta/filter/user_names/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | 10 | "github.com/segmentio/kafka-go" 11 | ) 12 | 13 | func main() { 14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 15 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 16 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.users") 17 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "user_names") 18 | 19 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, namesTopic, filterChange) 20 | 21 | service.CloseOnSignal(f) 22 | waitUntilClosed := f.Start() 23 | 24 | waitUntilClosed() 25 | } 26 | 27 | type user struct { 28 | ID int `json:"id"` 29 | UserName string `json:"user_name"` 30 | } 31 | 32 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) { 33 | if m.Payload.Op != "c" { 34 | return nil, nil 35 | } 36 | 37 | u := &user{} 38 | err := json.Unmarshal(m.Payload.After, u) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | return []kafka.Message{{Value: []byte(u.UserName)}}, nil 44 | } 45 | -------------------------------------------------------------------------------- /twitter/filter/user_names/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 7 | "github.com/codeuniversity/smag-mvp/service" 8 | "github.com/codeuniversity/smag-mvp/utils" 9 | "github.com/segmentio/kafka-go" 10 | ) 11 | 12 | func main() { 13 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 14 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 15 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.twitter_users") 16 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "twitter-user_names") 17 | 18 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, namesTopic, filterChange) 19 | 20 | service.CloseOnSignal(f) 21 | waitUntilClose := f.Start() 22 | 23 | waitUntilClose() 24 | } 25 | 26 | type user struct { 27 | ID int `json:"id"` 28 | Username string `json:"username"` 29 | } 30 | 31 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) { 32 | if m.Payload.Op != "c" { 33 | return nil, nil 34 | } 35 | 36 | u := &user{} 37 | err := json.Unmarshal(m.Payload.After, u) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | return []kafka.Message{{Value: []byte(u.Username)}}, nil 43 | } 44 | -------------------------------------------------------------------------------- /nlp/frequency-analyzer/cities.json: -------------------------------------------------------------------------------- 1 | { 2 | "berlin": [ 3 | "berlin", 4 | "BLN", 5 | "Brandenburg Gate", 6 | "Brandenburger Tor", 7 | "Bundestag" 8 | ], 9 | "london": [ 10 | "london", 11 | "LDN", 12 | "Big Ben", 13 | "Tower Bridge" 14 | ], 15 | "munich": [ 16 | "munich", 17 | "münchen", 18 | "MUC" 19 | ], 20 | "hamburg": [ 21 | "hamburg", 22 | "⚓" 23 | ], 24 | "paris": [ 25 | "paris", 26 | "Eiffel Tower", 27 | "Eiffelturm", 28 | "cdg" 29 | ], 30 | "rome": [ 31 | "rome", 32 | "rom", 33 | "colosseum" 34 | ], 35 | "amsterdam": [ 36 | "amsterdam", 37 | "AMS" 38 | ], 39 | "barcelona": [ 40 | "barcelona", 41 | "BCN" 42 | ], 43 | "copenhagen": [ 44 | "copenhagen", 45 | "kopenhagen", 46 | "cph" 47 | ], 48 | "lisbon": [ 49 | "lisbon", 50 | "lissabon", 51 | "lis" 52 | ], 53 | "vienna": [ 54 | "vienna", 55 | "wien", 56 | "VIE" 57 | ], 58 | "prague": [ 59 | "prague", 60 | "prag", 61 | "PRG" 62 | ], 63 | "madrid": [ 64 | "madrid", 65 | "mad" 66 | ] 67 | } -------------------------------------------------------------------------------- /cli/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | "github.com/codeuniversity/smag-mvp/utils" 10 | "github.com/segmentio/kafka-go" 11 | ) 12 | 13 | func main() { 14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 15 | instagramTopic := utils.GetStringFromEnvWithDefault("KAFKA_INSTAGRAM_TOPIC", "user_names") 16 | twitterTopic := utils.GetStringFromEnvWithDefault("KAFKA_TWITTER_TOPIC", "twitter.scraped.user_names") 17 | 18 | if len(os.Args) < 3 { 19 | panic("Invalid argumemts. Usage: cli ") 20 | } 21 | 22 | platformArg := os.Args[1] 23 | userNameArg := os.Args[2] 24 | 25 | var topic string 26 | switch platformArg { 27 | case "instagram": 28 | topic = instagramTopic 29 | break 30 | case "twitter": 31 | topic = twitterTopic 32 | break 33 | default: 34 | panic(fmt.Sprintf("Invalid platform option: %s\n", platformArg)) 35 | } 36 | 37 | w := kafka.NewWriter(kafka.WriterConfig{ 38 | Brokers: []string{kafkaAddress}, 39 | Topic: topic, 40 | Balancer: &kafka.LeastBytes{}, 41 | }) 42 | defer w.Close() 43 | t, cancel := context.WithTimeout(context.Background(), time.Second*10) 44 | defer cancel() 45 | err := w.WriteMessages(t, kafka.Message{ 46 | Value: []byte(userNameArg), 47 | }) 48 | utils.PanicIfNotNil(err) 49 | } 50 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/codeuniversity/smag-mvp 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.5.0 // indirect 7 | github.com/antchfx/htmlquery v1.1.0 // indirect 8 | github.com/antchfx/xmlquery v1.1.0 // indirect 9 | github.com/antchfx/xpath v1.1.0 // indirect 10 | github.com/aws/aws-sdk-go v1.25.16 11 | github.com/elastic/go-elasticsearch/v7 v7.4.1 12 | github.com/go-ini/ini v1.51.0 // indirect 13 | github.com/gobwas/glob v0.2.3 // indirect 14 | github.com/gocolly/colly v1.2.0 15 | github.com/golang/protobuf v1.3.2 16 | github.com/google/uuid v1.1.1 17 | github.com/jinzhu/gorm v1.9.11 18 | github.com/johnnadratowski/golang-neo4j-bolt-driver v0.0.0-20181101021923-6b24c0085aae 19 | github.com/kennygrant/sanitize v1.2.4 // indirect 20 | github.com/kr/pretty v0.1.0 // indirect 21 | github.com/lib/pq v1.2.0 22 | github.com/minio/minio-go v6.0.14+incompatible 23 | github.com/minio/minio-go/v6 v6.0.39 24 | github.com/neo4j-drivers/gobolt v1.7.4 // indirect 25 | github.com/neo4j/neo4j-go-driver v1.7.4 26 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 27 | github.com/segmentio/kafka-go v0.3.4 28 | github.com/stretchr/testify v1.4.0 29 | github.com/temoto/robotstxt v1.1.1 // indirect 30 | gocv.io/x/gocv v0.21.0 31 | golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582 32 | google.golang.org/grpc v1.24.0 33 | ) 34 | -------------------------------------------------------------------------------- /insta/models/faces.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/jinzhu/gorm" 5 | "github.com/jinzhu/gorm/dialects/postgres" 6 | ) 7 | 8 | // FaceData represents the face encoding table 9 | type FaceData struct { 10 | gorm.Model 11 | PostID int `json:"post_id"` 12 | X int `json:"x"` 13 | Y int `json:"y"` 14 | Width int `json:"width"` 15 | Height int `json:"height"` 16 | Encoding postgres.Jsonb `json:"encoding"` 17 | } 18 | 19 | // FaceRecognitionResult is the result of the face recognizer 20 | type FaceRecognitionResult struct { 21 | PostID int `json:"post_id"` 22 | Faces []*Face `json:"faces"` 23 | } 24 | 25 | // FaceReconJob represents the data for a face recon job 26 | type FaceReconJob struct { 27 | PostID int `json:"post_id"` 28 | InternalImageURL string `json:"internal_image_url"` 29 | X int `json:"x"` 30 | Y int `json:"y"` 31 | Width int `json:"width"` 32 | Height int `json:"height"` 33 | } 34 | 35 | // Face contains the position of a face in a post and its encoding 36 | type Face struct { 37 | X int `json:"x"` 38 | Y int `json:"y"` 39 | Width int `json:"width"` 40 | Height int `json:"height"` 41 | Encoding [128]float32 `json:"encoding"` 42 | } 43 | -------------------------------------------------------------------------------- /face-recognition/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | recognition "github.com/codeuniversity/smag-mvp/face-recognition" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 13 | jobsReadTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECONED_TOPIC") 14 | jobsWriteTopic := utils.MustGetStringFromEnv("KAFKA_FACE_DETECTION_RESULTS_TOPIC") 15 | faceRecognizerAddress := utils.MustGetStringFromEnv("FACE_RECOGNIZER_ADDRESS") 16 | pictureBucketName := utils.MustGetStringFromEnv("S3_PICTURE_BUCKET_NAME") 17 | imgProxyAddress := utils.MustGetStringFromEnv("IMGPROXY_ADDRESS") 18 | imgProxyKey := utils.MustGetStringFromEnv("IMGPROXY_KEY") 19 | imgProxySalt := utils.MustGetStringFromEnv("IMGPROXY_SALT") 20 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic)) 21 | qWriter := kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, jobsWriteTopic, true)) 22 | 23 | r := recognition.New(qReader, qWriter, faceRecognizerAddress, pictureBucketName, imgProxyAddress, imgProxyKey, imgProxySalt) 24 | 25 | service.CloseOnSignal(r) 26 | waitUntilDone := r.Start() 27 | 28 | waitUntilDone() 29 | } 30 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/user/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka" 7 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter" 9 | "github.com/codeuniversity/smag-mvp/service" 10 | "github.com/codeuniversity/smag-mvp/utils" 11 | 12 | "github.com/neo4j/neo4j-go-driver/neo4j" 13 | ) 14 | 15 | func main() { 16 | readerConfig := kafka.GetInserterConfig() 17 | neo4jConfig := utils.GetNeo4jConfig() 18 | 19 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), insertUsersAndFollowings) 20 | 21 | service.CloseOnSignal(i) 22 | waitUntilClosed := i.Start() 23 | 24 | waitUntilClosed() 25 | } 26 | 27 | type Follow struct { 28 | FromID int `json:"from_id"` 29 | ToID int `json:"to_id"` 30 | } 31 | 32 | func insertUsersAndFollowings(m *changestream.ChangeMessage, session neo4j.Session) error { 33 | const createUsersAndRelationships = ` 34 | MERGE(u1:USER{id: $fromID}) 35 | MERGE(u2:USER{id: $toID}) 36 | MERGE(u1)-[:FOLLOWS]->(u2) 37 | ` 38 | f := &Follow{} 39 | err := json.Unmarshal(m.Payload.After, f) 40 | 41 | if err != nil { 42 | return err 43 | } 44 | 45 | _, err = session.Run(createUsersAndRelationships, map[string]interface{}{"fromID": f.FromID, "toID": f.ToID}) 46 | 47 | if err != nil { 48 | return err 49 | } 50 | 51 | return nil 52 | } 53 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/posts/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka" 7 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter" 9 | "github.com/codeuniversity/smag-mvp/service" 10 | "github.com/codeuniversity/smag-mvp/utils" 11 | 12 | "github.com/neo4j/neo4j-go-driver/neo4j" 13 | ) 14 | 15 | func main() { 16 | readerConfig := kafka.GetInserterConfig() 17 | neo4jConfig := utils.GetNeo4jConfig() 18 | 19 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), insertPostsAndAddRelationship) 20 | 21 | service.CloseOnSignal(i) 22 | waitUntilClosed := i.Start() 23 | 24 | waitUntilClosed() 25 | } 26 | 27 | type Post struct { 28 | UserID int `json:"user_id"` 29 | PostID int `json:"id"` 30 | } 31 | 32 | func insertPostsAndAddRelationship(m *changestream.ChangeMessage, session neo4j.Session) error { 33 | const insertPostsAndAddRelationship = ` 34 | MERGE(u:USER{id: $userID}) 35 | MERGE(p:POST{id: $postID}) 36 | MERGE(u)-[:POSTED]->(p) 37 | ` 38 | p := &Post{} 39 | err := json.Unmarshal(m.Payload.After, p) 40 | 41 | if err != nil { 42 | return err 43 | } 44 | 45 | _, err = session.Run(insertPostsAndAddRelationship, map[string]interface{}{"userID": p.UserID, "postID": p.PostID}) 46 | 47 | if err != nil { 48 | return err 49 | } 50 | 51 | return nil 52 | } 53 | -------------------------------------------------------------------------------- /elastic/mapping.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | const CommentsIndexMapping = ` 4 | { 5 | "mappings" : { 6 | "properties" : { 7 | "comment" : { 8 | "type" : "text" 9 | }, 10 | "post_id" : { 11 | "type" : "integer" 12 | } 13 | } 14 | } 15 | } 16 | ` 17 | 18 | const FacesIndexMapping = ` 19 | { 20 | "mappings" : { 21 | "properties" : { 22 | "encoding_vector": { 23 | "type": "binary", 24 | "doc_values": true 25 | }, 26 | "post_id": { 27 | "type": "integer" 28 | }, 29 | "x": { 30 | "type": "integer" 31 | }, 32 | "y": { 33 | "type": "integer" 34 | }, 35 | "width": { 36 | "type": "integer" 37 | }, 38 | "height":{ 39 | "type": "integer" 40 | } 41 | } 42 | } 43 | } 44 | ` 45 | 46 | const PostsIndexMapping = ` 47 | { 48 | "mappings" : { 49 | "properties" : { 50 | "caption" : { 51 | "type" : "text" 52 | }, 53 | "user_id" : { 54 | "type" : "integer" 55 | } 56 | } 57 | } 58 | } 59 | ` 60 | 61 | const UsersIndexMapping = ` 62 | { 63 | "mappings" : { 64 | "properties" : { 65 | "id": { 66 | "type": "integer" 67 | } 68 | "user_name": { 69 | "type": "text" 70 | } 71 | "real_name": { 72 | "type": "text" 73 | } 74 | "bio": { 75 | "type": "text" 76 | } 77 | } 78 | } 79 | } 80 | ` 81 | -------------------------------------------------------------------------------- /insta/inserter/neo4j/tagged_users/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka" 7 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter" 9 | "github.com/codeuniversity/smag-mvp/service" 10 | "github.com/codeuniversity/smag-mvp/utils" 11 | 12 | "github.com/neo4j/neo4j-go-driver/neo4j" 13 | ) 14 | 15 | func main() { 16 | 17 | readerConfig := kafka.GetInserterConfig() 18 | neo4jConfig := utils.GetNeo4jConfig() 19 | 20 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), addTaggedUsersRelationship) 21 | 22 | service.CloseOnSignal(i) 23 | waitUntilClosed := i.Start() 24 | 25 | waitUntilClosed() 26 | } 27 | 28 | type taggedUser struct { 29 | UserID int `json:"user_id"` 30 | PostID int `json:"post_id"` 31 | } 32 | 33 | func addTaggedUsersRelationship(m *changestream.ChangeMessage, session neo4j.Session) error { 34 | const addTaggedRelationship = ` 35 | MERGE(u:USER{id: $userID}) 36 | MERGE(p:POST{id: $postID}) 37 | MERGE(u)-[:TAGGED_ON]->(p) 38 | ` 39 | t := &taggedUser{} 40 | err := json.Unmarshal(m.Payload.After, t) 41 | 42 | if err != nil { 43 | return err 44 | } 45 | 46 | _, err = session.Run(addTaggedRelationship, map[string]interface{}{"userID": t.UserID, "postID": t.PostID}) 47 | 48 | if err != nil { 49 | return err 50 | } 51 | 52 | return nil 53 | } 54 | -------------------------------------------------------------------------------- /kafka/changestream/change_message.go: -------------------------------------------------------------------------------- 1 | package changestream 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | // ChangeMessage ... 8 | type ChangeMessage struct { 9 | Schema struct { 10 | Type string `json:"type"` 11 | Fields []struct { 12 | Type string `json:"type"` 13 | Fields []struct { 14 | Type string `json:"type"` 15 | Optional bool `json:"optional"` 16 | Field string `json:"field"` 17 | } `json:"fields,omitempty"` 18 | Optional bool `json:"optional"` 19 | Name string `json:"name,omitempty"` 20 | Field string `json:"field"` 21 | } `json:"fields"` 22 | Optional bool `json:"optional"` 23 | Name string `json:"name"` 24 | } `json:"schema"` 25 | Payload struct { 26 | Before json.RawMessage `json:"before"` 27 | After json.RawMessage `json:"after"` 28 | Source struct { 29 | Version string `json:"version"` 30 | Connector string `json:"connector"` 31 | Name string `json:"name"` 32 | TsMs int64 `json:"ts_ms"` 33 | Snapshot string `json:"snapshot"` 34 | Db string `json:"db"` 35 | Schema string `json:"schema"` 36 | Table string `json:"table"` 37 | TxID int `json:"txId"` 38 | Lsn int `json:"lsn"` 39 | Xmin interface{} `json:"xmin"` 40 | } `json:"source"` 41 | Op string `json:"op"` 42 | TsMs int64 `json:"ts_ms"` 43 | } `json:"payload"` 44 | } 45 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/posts_scraper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import twint 4 | 5 | from .scraper_manager import ScraperManager 6 | from .utils import get_conf 7 | 8 | 9 | class TweetsScraper(ScraperManager): 10 | name = "tweets_scraper" 11 | 12 | @staticmethod 13 | def scrape(user_name: str): 14 | logging.info(f"Scrape tweets of user {user_name}") 15 | 16 | tweets = [] 17 | 18 | c = get_conf(user_name) 19 | c.Store_object_tweets_list = tweets 20 | 21 | twint.run.Search(c) 22 | return tweets 23 | 24 | 25 | if __name__ == "__main__": 26 | import os 27 | 28 | log_level = logging.DEBUG if os.getenv("DEBUG", "false") == "true" else logging.INFO 29 | 30 | logging.basicConfig( 31 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s", 32 | datefmt="%H:%M:%S", 33 | level=log_level, 34 | ) 35 | 36 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped") 37 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names") 38 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "tweets_scraper") 39 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092") 40 | 41 | tweets_scraper = TweetsScraper( 42 | insert_topic=insert_topic, 43 | fetch_topic=fetch_topic, 44 | kafka_consumer_group=kafka_consumer_group, 45 | kafka_address=kafka_address, 46 | ) 47 | tweets_scraper.run() 48 | -------------------------------------------------------------------------------- /insta/posts_face-detection/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | detection "github.com/codeuniversity/smag-mvp/insta/posts_face-detection" 5 | "github.com/codeuniversity/smag-mvp/kafka" 6 | "github.com/codeuniversity/smag-mvp/service" 7 | "github.com/codeuniversity/smag-mvp/utils" 8 | ) 9 | 10 | func main() { 11 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 13 | jobsReadTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECON_TOPIC") 14 | jobsWriteTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECONED_TOPIC") 15 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic)) 16 | qWriter := kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, jobsWriteTopic, true)) 17 | 18 | config := detection.Config{ 19 | S3BucketName: utils.GetStringFromEnvWithDefault("S3_BUCKET_NAME", "insta_pics"), 20 | S3Region: utils.GetStringFromEnvWithDefault("S3_REGION", "eu-west-1"), 21 | S3Endpoint: utils.GetStringFromEnvWithDefault("S3_ENDOINT", "127.0.0.1:9000"), 22 | S3AccessKeyID: utils.MustGetStringFromEnv("S3_ACCESS_KEY_ID"), 23 | S3SecretAccessKey: utils.MustGetStringFromEnv("S3_SECRET_ACCESS_KEY"), 24 | S3UseSSL: utils.GetBoolFromEnvWithDefault("S3_USE_SSL", true), 25 | } 26 | 27 | d := detection.New(qReader, qWriter, config) 28 | 29 | service.CloseOnSignal(d) 30 | waitUntilDone := d.Start() 31 | 32 | waitUntilDone() 33 | } 34 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/users_scraper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import twint 4 | 5 | from .scraper_manager import ScraperManager 6 | from .utils import get_conf 7 | 8 | 9 | def scrape(user_name: str) -> twint.user.user: 10 | conf = get_conf(user_name) 11 | twint.run.Lookup(conf) 12 | user = twint.output.users_list.pop() 13 | return user 14 | 15 | 16 | class UserScraper(ScraperManager): 17 | name = "user_scraper" 18 | 19 | @staticmethod 20 | def scrape(user_name: str): 21 | logging.info(f"Scrape user {user_name}") 22 | user = scrape(user_name) 23 | return user 24 | 25 | 26 | if __name__ == "__main__": 27 | import os 28 | 29 | log_level = logging.DEBUG if os.getenv("DEBUG", "false") == "true" else logging.INFO 30 | 31 | logging.basicConfig( 32 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s", 33 | datefmt="%H:%M:%S", 34 | level=log_level, 35 | ) 36 | 37 | 38 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names") 39 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped") 40 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "user_scraper") 41 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092") 42 | 43 | user_scraper = UserScraper( 44 | insert_topic=insert_topic, 45 | fetch_topic=fetch_topic, 46 | kafka_consumer_group=kafka_consumer_group, 47 | kafka_address=kafka_address, 48 | ) 49 | user_scraper.run() 50 | -------------------------------------------------------------------------------- /insta/filter/user_names/filter_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestFilter(t *testing.T) { 11 | validPayloadJSON := []byte("{\"id\":1,\"user_name\":\"TestUser\"}") 12 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"user_name\":\"TestUser\"}") 13 | 14 | t.Run("create event with unmarshable json", func(t *testing.T) { 15 | //create test input 16 | changeMsg := &changestream.ChangeMessage{} 17 | changeMsg.Payload.Op = "c" 18 | changeMsg.Payload.After = validPayloadJSON 19 | 20 | kMessages, err := filterChange(changeMsg) 21 | 22 | assert.Nil(t, err, "no error") 23 | assert.Equal(t, 1, len(kMessages)) 24 | assert.Equal(t, "TestUser", string(kMessages[0].Value)) 25 | }) 26 | 27 | t.Run("create event with not unmarshable json", func(t *testing.T) { 28 | //create test input 29 | changeMsg := &changestream.ChangeMessage{} 30 | changeMsg.Payload.Op = "c" 31 | changeMsg.Payload.After = invalidPayloadJSON 32 | 33 | kMessages, err := filterChange(changeMsg) 34 | 35 | assert.NotNil(t, err, "error occurs") 36 | assert.Nil(t, kMessages, "nil output") 37 | }) 38 | 39 | t.Run("ignored event", func(t *testing.T) { 40 | //create test input 41 | changeMsg := &changestream.ChangeMessage{} 42 | changeMsg.Payload.Op = "u" 43 | 44 | kMessages, err := filterChange(changeMsg) 45 | 46 | assert.Nil(t, err, "no error") 47 | assert.Nil(t, kMessages, "nil output") 48 | }) 49 | } 50 | -------------------------------------------------------------------------------- /twitter/filter/user_names/filter_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestFilter(t *testing.T) { 11 | validPayloadJSON := []byte("{\"id\":1,\"username\":\"TestUser\"}") 12 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"username\":\"TestUser\"}") 13 | 14 | t.Run("create event with unmarshable json", func(t *testing.T) { 15 | //create test input 16 | changeMsg := &changestream.ChangeMessage{} 17 | changeMsg.Payload.Op = "c" 18 | changeMsg.Payload.After = validPayloadJSON 19 | 20 | kMessages, err := filterChange(changeMsg) 21 | 22 | assert.Nil(t, err, "no error") 23 | assert.Equal(t, 1, len(kMessages)) 24 | assert.Equal(t, "TestUser", string(kMessages[0].Value)) 25 | }) 26 | 27 | t.Run("create event with not unmarshable json", func(t *testing.T) { 28 | //create test input 29 | changeMsg := &changestream.ChangeMessage{} 30 | changeMsg.Payload.Op = "c" 31 | changeMsg.Payload.After = invalidPayloadJSON 32 | 33 | kMessages, err := filterChange(changeMsg) 34 | 35 | assert.NotNil(t, err, "error occurs") 36 | assert.Nil(t, kMessages, "nil output") 37 | }) 38 | 39 | t.Run("ignored event", func(t *testing.T) { 40 | //create test input 41 | changeMsg := &changestream.ChangeMessage{} 42 | changeMsg.Payload.Op = "u" 43 | 44 | kMessages, err := filterChange(changeMsg) 45 | 46 | assert.Nil(t, err, "no error") 47 | assert.Nil(t, kMessages, "nil output") 48 | }) 49 | } 50 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. 4 | 5 | Fixes # (issue) 6 | 7 | ## Type of change 8 | 9 | - [ ] Bug fix (non-breaking change which fixes an issue) 10 | - [ ] New feature (non-breaking change which adds functionality) 11 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 12 | - [ ] This change requires a documentation update 13 | - [ ] Cleanup (changes in structure but not functionality) 14 | 15 | # How Has This Been Tested? 16 | 17 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration 18 | 19 | - [ ] Test A 20 | - [ ] Test B 21 | 22 | **Test Configuration**: 23 | * Go version: 24 | * Kafka version: 25 | * Kubernetes version: 26 | 27 | # Checklist: 28 | 29 | - [ ] My code follows the style guidelines of this project 30 | - [ ] I have performed a self-review of my own code 31 | - [ ] I have commented my code, particularly in hard-to-understand areas 32 | - [ ] I have made corresponding changes to the documentation 33 | - [ ] My changes generate no new warnings 34 | - [ ] I have added tests that prove my fix is effective or that my feature works 35 | - [ ] New and existing unit tests pass locally with my changes 36 | - [ ] Any dependent changes have been merged and published in downstream modules 37 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "react-tutorial", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@fortawesome/fontawesome-svg-core": "^1.2.25", 7 | "@fortawesome/free-solid-svg-icons": "^5.11.2", 8 | "@fortawesome/react-fontawesome": "^0.1.7", 9 | "@improbable-eng/grpc-web": "^0.11.0", 10 | "@types/google-protobuf": "^3.7.2", 11 | "bootstrap": "^4.3.1", 12 | "google-protobuf": "^3.11.0", 13 | "grpc-web": "^1.0.6", 14 | "lodash": "^4.17.15", 15 | "neovis.js": "^1.2.1", 16 | "prop-types": "^15.7.2", 17 | "react": "^16.10.1", 18 | "react-bootstrap": "^1.0.0-beta.12", 19 | "react-dom": "^16.10.1", 20 | "react-graph-vis": "^1.0.5", 21 | "react-router": "^5.1.2", 22 | "react-router-dom": "^5.1.2", 23 | "react-scripts": "3.3.0", 24 | "react-slideshow-image": "^1.3.1" 25 | }, 26 | "scripts": { 27 | "start": "react-scripts start", 28 | "build": "react-scripts build", 29 | "test": "react-scripts test", 30 | "eject": "react-scripts eject" 31 | }, 32 | "eslintConfig": { 33 | "extends": "react-app" 34 | }, 35 | "browserslist": { 36 | "production": [ 37 | ">0.2%", 38 | "not dead", 39 | "not op_mini all" 40 | ], 41 | "development": [ 42 | "last 1 chrome version", 43 | "last 1 firefox version", 44 | "last 1 safari version" 45 | ] 46 | }, 47 | "husky": { 48 | "hooks": { 49 | "pre-commit": "pretty-quick --staged" 50 | } 51 | }, 52 | "devDependencies": { 53 | "husky": "^3.1.0", 54 | "prettier": "1.19.1", 55 | "pretty-quick": "^2.0.1" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /faces/server.py: -------------------------------------------------------------------------------- 1 | 2 | from concurrent import futures 3 | import time 4 | import grpc 5 | import os 6 | import prometheus_client 7 | 8 | import recognizer_pb2_grpc as grpc_proto 9 | import recognizer_pb2 as proto 10 | import recognizer 11 | import metrics 12 | 13 | 14 | class Servicer(grpc_proto.FaceRecognizerServicer): 15 | RECOGNIZE_HISTOGRAM = metrics.request_latency_histogram.labels( 16 | 'recognize_faces') 17 | 18 | @RECOGNIZE_HISTOGRAM.time() 19 | def RecognizeFaces(self, request, context): 20 | 21 | faces = recognizer.recognize(request.url) 22 | proto_faces = [] 23 | for face in faces: 24 | area = face['area'] 25 | encoding = face['encoding'] 26 | proto_face = proto.Face(x=area['x'], y=area['y'], width=area['width'], 27 | height=area['height'], encoding=encoding) 28 | proto_faces.append(proto_face) 29 | 30 | metrics.request_counter.labels('recognize_faces').inc() 31 | 32 | return proto.RegognizeResponse(faces=proto_faces) 33 | 34 | 35 | def serve(): 36 | 37 | server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) 38 | 39 | grpc_proto.add_FaceRecognizerServicer_to_server( 40 | Servicer(), 41 | server 42 | ) 43 | 44 | server.add_insecure_port('[::]:' + os.environ['GRPC_PORT']) 45 | 46 | server.start() 47 | 48 | try: 49 | while True: 50 | time.sleep(60 * 60 * 24) 51 | except KeyboardInterrupt: 52 | server.stop(0) 53 | 54 | 55 | if __name__ == '__main__': 56 | prometheus_client.start_http_server(int(os.environ['METRICS_PORT'])) 57 | serve() 58 | -------------------------------------------------------------------------------- /db/README.md: -------------------------------------------------------------------------------- 1 | # postgres database 2 | 3 | We are using [POSTGRESQL](https://www.postgresql.org/) as the store for the raw scraped data from the various data sources.
4 | The schemas are quite similar to the scraped data structures. 5 | 6 | **Table of Contents** 7 | 8 | - [Instagram](#instagram) 9 | - [Remarks](#remarks) 10 | - [Twitter](#twitter) 11 | - [Debezium](#debezium) 12 | 13 | ## [Instagram](https://www.instagram.com/) 14 | 15 | This database is the more sophisticated one and is running in production. 16 | 17 | ![insta_schema](../docs/insta_schema.png) 18 | 19 | ### Remarks 20 | 21 | - `internal_picture_url` is pointing to the downloaded picture on S3 22 | 23 | ## Twitter 24 | 25 | This database is not in production yet and at the moment only dumps the tweaked scraped data. 26 | 27 | ![twitter_schema](../docs/twitter_schema.png) 28 | 29 | ## Debezium 30 | 31 | The [debezium](https://github.com/debezium/debezium) connector generates a change stream from all change events in postgres (`read`, `create`, `update`, `delete`) and writes them into a kafka-topic `"postgres.public."` 32 | 33 | To read from this stream you can: 34 | 35 | - get [`kafkacat`](https://github.com/edenhill/kafkacat) 36 | - inspect the topic list in kafka: 37 | ```bash 38 | $ kafkacat -L -b my-kafka | grep 'topic "postgres' 39 | ``` 40 | - consume a topic with 41 | ```bash 42 | $ kafkacat -b my-kafka -t 43 | ``` 44 | 45 | The messages are quite verbose, since they include their own schema description. The most interesting part is the `value.payload`: 46 | 47 | ```bash 48 | $ kafkacat -b my-kafka -topic postgres.public.users | jq '.value | fromjson | .payload'` 49 | ``` 50 | -------------------------------------------------------------------------------- /faces/recognizer_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | import grpc 3 | 4 | import recognizer_pb2 as recognizer__pb2 5 | 6 | 7 | class FaceRecognizerStub(object): 8 | # missing associated documentation comment in .proto file 9 | pass 10 | 11 | def __init__(self, channel): 12 | """Constructor. 13 | 14 | Args: 15 | channel: A grpc.Channel. 16 | """ 17 | self.RecognizeFaces = channel.unary_unary( 18 | '/proto.FaceRecognizer/RecognizeFaces', 19 | request_serializer=recognizer__pb2.RecognizeRequest.SerializeToString, 20 | response_deserializer=recognizer__pb2.RegognizeResponse.FromString, 21 | ) 22 | 23 | 24 | class FaceRecognizerServicer(object): 25 | # missing associated documentation comment in .proto file 26 | pass 27 | 28 | def RecognizeFaces(self, request, context): 29 | # missing associated documentation comment in .proto file 30 | pass 31 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 32 | context.set_details('Method not implemented!') 33 | raise NotImplementedError('Method not implemented!') 34 | 35 | 36 | def add_FaceRecognizerServicer_to_server(servicer, server): 37 | rpc_method_handlers = { 38 | 'RecognizeFaces': grpc.unary_unary_rpc_method_handler( 39 | servicer.RecognizeFaces, 40 | request_deserializer=recognizer__pb2.RecognizeRequest.FromString, 41 | response_serializer=recognizer__pb2.RegognizeResponse.SerializeToString, 42 | ), 43 | } 44 | generic_handler = grpc.method_handlers_generic_handler( 45 | 'proto.FaceRecognizer', rpc_method_handlers) 46 | server.add_generic_rpc_handlers((generic_handler,)) 47 | -------------------------------------------------------------------------------- /http_header-generator/generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "encoding/json" 5 | "io/ioutil" 6 | "math/rand" 7 | "net/http" 8 | "time" 9 | ) 10 | 11 | // HTTPHeaderGenerator generates headers for http requests for scraping 12 | type HTTPHeaderGenerator struct { 13 | browserAgent browserAgent 14 | } 15 | 16 | // New returns an initialized HTTPHeaderGenerator 17 | func New() *HTTPHeaderGenerator { 18 | generator := &HTTPHeaderGenerator{} 19 | data, err := ioutil.ReadFile("http_header-generator/useragents.json") 20 | if err != nil { 21 | panic(err) 22 | } 23 | var userAgent browserAgent 24 | err = json.Unmarshal(data, &userAgent) 25 | 26 | if err != nil { 27 | panic(err) 28 | } 29 | generator.browserAgent = userAgent 30 | return generator 31 | } 32 | 33 | type browserAgent []struct { 34 | UserAgents string `json:"useragent"` 35 | } 36 | 37 | // AddHeaders ads the generated headers to the request headers 38 | func (h *HTTPHeaderGenerator) AddHeaders(header *http.Header) { 39 | header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3") 40 | header.Add("Accept-Charset", "utf-8") 41 | header.Add("Accept-Language", "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7") 42 | header.Add("Cache-Control", "no-cache") 43 | header.Add("Content-Type", "application/json; charset=utf-8") 44 | header.Add("User-Agent", h.GetRandomUserAgent()) 45 | } 46 | 47 | // GetRandomUserAgent returns a random user agent 48 | func (h *HTTPHeaderGenerator) GetRandomUserAgent() string { 49 | rand.Seed(time.Now().UnixNano()) 50 | randomNumber := rand.Intn(len(h.browserAgent)) 51 | return h.browserAgent[randomNumber].UserAgents 52 | } 53 | -------------------------------------------------------------------------------- /api/envoy-proxy/envoy.yaml: -------------------------------------------------------------------------------- 1 | admin: 2 | access_log_path: /tmp/admin_access.log 3 | address: 4 | socket_address: { address: 0.0.0.0, port_value: 9901 } 5 | 6 | static_resources: 7 | listeners: 8 | - name: listener_0 9 | address: 10 | socket_address: { address: 0.0.0.0, port_value: 8080 } 11 | filter_chains: 12 | - filters: 13 | - name: envoy.http_connection_manager 14 | config: 15 | codec_type: auto 16 | stat_prefix: ingress_http 17 | route_config: 18 | name: local_route 19 | virtual_hosts: 20 | - name: local_service 21 | domains: ["*"] 22 | routes: 23 | - match: { prefix: "/" } 24 | route: 25 | cluster: user_search_service 26 | max_grpc_timeout: 0s 27 | cors: 28 | allow_origin: 29 | - "*" 30 | allow_methods: GET, PUT, DELETE, POST, OPTIONS 31 | allow_headers: keep-alive,user-agent,cache-control,content-type,content-transfer-encoding,custom-header-1,x-accept-content-transfer-encoding,x-accept-response-streaming,x-user-agent,x-grpc-web,grpc-timeout 32 | max_age: "1728000" 33 | expose_headers: custom-header-1,grpc-status,grpc-message 34 | http_filters: 35 | - name: envoy.grpc_web 36 | - name: envoy.cors 37 | - name: envoy.router 38 | clusters: 39 | - name: user_search_service 40 | connect_timeout: 0.25s 41 | type: logical_dns 42 | http2_protocol_options: {} 43 | lb_policy: round_robin 44 | hosts: [{ socket_address: { address: grpc-server, port_value: 10000 }}] 45 | -------------------------------------------------------------------------------- /insta/scraper/likes/insta-model.go: -------------------------------------------------------------------------------- 1 | package insta_likes_scraper 2 | 3 | type InstaPostLikes struct { 4 | Data struct { 5 | ShortcodeMedia struct { 6 | ID string `json:"id"` 7 | Shortcode string `json:"shortcode"` 8 | EdgeLikedBy struct { 9 | Count int `json:"count"` 10 | PageInfo struct { 11 | HasNextPage bool `json:"has_next_page"` 12 | EndCursor string `json:"end_cursor"` 13 | } `json:"page_info"` 14 | Edges []struct { 15 | Node struct { 16 | ID string `json:"id"` 17 | Username string `json:"username"` 18 | FullName string `json:"full_name"` 19 | ProfilePicURL string `json:"profile_pic_url"` 20 | IsPrivate bool `json:"is_private"` 21 | IsVerified bool `json:"is_verified"` 22 | FollowedByViewer bool `json:"followed_by_viewer"` 23 | RequestedByViewer bool `json:"requested_by_viewer"` 24 | Reel struct { 25 | ID string `json:"id"` 26 | ExpiringAt int `json:"expiring_at"` 27 | HasPrideMedia bool `json:"has_pride_media"` 28 | LatestReelMedia interface{} `json:"latest_reel_media"` 29 | Seen interface{} `json:"seen"` 30 | Owner struct { 31 | Typename string `json:"__typename"` 32 | ID string `json:"id"` 33 | ProfilePicURL string `json:"profile_pic_url"` 34 | Username string `json:"username"` 35 | } `json:"owner"` 36 | } `json:"reel"` 37 | } `json:"node"` 38 | } `json:"edges"` 39 | } `json:"edge_liked_by"` 40 | } `json:"shortcode_media"` 41 | } `json:"data"` 42 | Status string `json:"status"` 43 | } 44 | -------------------------------------------------------------------------------- /frontend/src/index.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom"; 3 | import "./index.css"; 4 | import { Route, Link, BrowserRouter as Router, Switch } from "react-router-dom"; 5 | import App from "./App"; 6 | import { BrowserRouter } from "react-router-dom"; 7 | import Result from "./components/Result"; 8 | import Dashboard from "./pages/Dashboard"; 9 | import Notfound from "./notfound"; 10 | import FlowWrapper from "./components/FlowWrapper"; 11 | import Popup from "./components/Popup"; 12 | import Greeting from "./pages/Greeting.jsx"; 13 | import Network from "./pages/Network.jsx"; 14 | import Endscreen from "./pages/endscreen"; 15 | import SearchProfile from "./pages/SearchProfile"; 16 | import ExampleProfileSelection from "./pages/ExampleProfileSelection"; 17 | 18 | const root = document.getElementById("root"); 19 | 20 | ReactDOM.render( 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 38 | 39 | 40 |
41 |
, 42 | root 43 | ); 44 | -------------------------------------------------------------------------------- /elastic/models/face.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "encoding/base64" 5 | "encoding/binary" 6 | "encoding/json" 7 | "math" 8 | 9 | "github.com/codeuniversity/smag-mvp/insta/models" 10 | ) 11 | 12 | // FaceDoc is the type that is used to store a face in elasticsearch 13 | type FaceDoc struct { 14 | PostID int `json:"post_id"` 15 | X int `json:"x"` 16 | Y int `json:"y"` 17 | Width int `json:"width"` 18 | Height int `json:"height"` 19 | EncodingVector string `json:"encoding_vector"` 20 | } 21 | 22 | // FaceDocFromFaceData returns a FaceDoc with an encoded `EncodingVector` given a faceData model 23 | func FaceDocFromFaceData(faceData *models.FaceData) (*FaceDoc, error) { 24 | var encodingString string 25 | err := json.Unmarshal(faceData.Encoding.RawMessage, &encodingString) 26 | if err != nil { 27 | return nil, err 28 | } 29 | encoding := []float32{} 30 | err = json.Unmarshal([]byte(encodingString), &encoding) 31 | if err != nil { 32 | return nil, err 33 | } 34 | return &FaceDoc{ 35 | PostID: faceData.PostID, 36 | X: faceData.X, 37 | Y: faceData.Y, 38 | Width: faceData.Width, 39 | Height: faceData.Height, 40 | EncodingVector: EncodedVector(encoding), 41 | }, nil 42 | } 43 | 44 | // EncodedVector for the given encoding, used for searching and looking up faces in elastic search 45 | func EncodedVector(encoding []float32) string { 46 | bytes := make([]byte, 0, 4*len(encoding)) 47 | for _, a := range encoding { 48 | bits := math.Float32bits(a) 49 | b := make([]byte, 4) 50 | binary.BigEndian.PutUint32(b, bits) 51 | bytes = append(bytes, b...) 52 | } 53 | 54 | encoded := base64.StdEncoding.EncodeToString(bytes) 55 | return encoded 56 | } 57 | -------------------------------------------------------------------------------- /faces/recognizer.py: -------------------------------------------------------------------------------- 1 | import face_recognition 2 | import tempfile 3 | import os 4 | import requests 5 | import random 6 | import string 7 | 8 | 9 | def recognize(url): 10 | image = download_and_read_image(url) 11 | if image is None: 12 | return [] 13 | 14 | locations = face_recognition.face_locations(image) 15 | encodings = face_recognition.face_encodings( 16 | image, known_face_locations=locations) 17 | 18 | img_height = len(image) 19 | img_width = len(image[0]) 20 | 21 | faces = [] 22 | for index, location in enumerate(locations): 23 | encoding = encodings[index] 24 | (top, right, bottom, left) = location 25 | x = left 26 | y = top 27 | width = right - left 28 | height = bottom - top 29 | 30 | area = { 31 | "x": x, 32 | "y": y, 33 | "width": width, 34 | "height": height, 35 | } 36 | faces.append({ 37 | "area": area, 38 | "encoding": encoding, 39 | }) 40 | return faces 41 | 42 | 43 | def download_and_read_image(url): 44 | file_name = random_string() + ".jpg" 45 | with open(file_name, 'wb') as handle: 46 | response = requests.get(url, stream=True) 47 | 48 | if not response.ok: 49 | print("failed to get file with name: " + file_name) 50 | return None 51 | 52 | for block in response.iter_content(1024): 53 | if not block: 54 | break 55 | 56 | handle.write(block) 57 | image = face_recognition.load_image_file(file_name) 58 | 59 | os.remove(file_name) 60 | 61 | return image 62 | 63 | 64 | def random_string(n=12): 65 | return ''.join(random.choices(string.ascii_uppercase + string.digits, k=n)) 66 | -------------------------------------------------------------------------------- /frontend/src/pages/SearchProfile.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import Form from "../components/Form"; 3 | import H2 from "../components/H2"; 4 | import Button from "../components/Button"; 5 | import Result from "../components/Result"; 6 | import { 7 | User, 8 | UserNameRequest, 9 | UserIdRequest, 10 | Post, 11 | UserIdResponse, 12 | UserSearchResponse 13 | } from "../protofiles/usersearch_pb.js"; 14 | import { UserSearchServiceClient } from "../protofiles/usersearch_grpc_web_pb"; 15 | import { withRouter } from "react-router"; 16 | import PropTypes from "prop-types"; 17 | 18 | // eslint-disable-next-line 19 | 20 | class SearchProfile extends Component { 21 | handleSubmit = async userName => { 22 | const requestUser = new UserNameRequest(); 23 | requestUser.setUserName(userName); 24 | const response = await this.props.apiClient.getUserWithUsername( 25 | requestUser 26 | ); 27 | const user = response.toObject(); 28 | const profile = { facesList: [], weight: 0, user: user }; 29 | this.props.onProfileSelect(profile); 30 | }; 31 | render() { 32 | return ( 33 |
34 |
35 |

We couldn't find you. Please enter your instagram username.

36 |
37 |
38 | 41 |
42 |
43 | ); 44 | } 45 | } 46 | 47 | SearchProfile.propTypes = { 48 | history: PropTypes.shape({ 49 | push: PropTypes.shape({ 50 | pathname: PropTypes.string, 51 | state: PropTypes.object 52 | }) 53 | }) 54 | }; 55 | 56 | export default withRouter(SearchProfile); 57 | -------------------------------------------------------------------------------- /nlp/frequency-analyzer/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "os" 9 | "strconv" 10 | 11 | analyzer "github.com/codeuniversity/smag-mvp/nlp/frequency-analyzer" 12 | ) 13 | 14 | func main() { 15 | if len(os.Args) != 2 { 16 | log.Fatal("Num of args isn't 1.\nUsage: go run main.go ") 17 | } 18 | userID, err := strconv.Atoi(os.Args[1]) 19 | if err != nil { 20 | log.Fatalf("Unable to convert user_id=%+v to integer.\nsage: go run main.go ", userID) 21 | } 22 | 23 | a := analyzer.New([]string{"http://127.0.0.1:9200"}) 24 | log.Printf("a=%+v", a) 25 | 26 | // TODO: load cities.json 27 | jsonFile, err := os.Open("nlp/frequency-analyzer/cities.json") 28 | defer jsonFile.Close() 29 | if err != nil { 30 | log.Fatalln(err) 31 | } 32 | byteValue, err := ioutil.ReadAll(jsonFile) 33 | var cityMap map[string][]string 34 | if err := json.Unmarshal(byteValue, &cityMap); err != nil { 35 | panic(err) 36 | } 37 | log.Printf("cityMap=%+v", cityMap) 38 | 39 | foundCities := make(map[string]bool) 40 | for city, cityTerms := range cityMap { 41 | foundTerms, err := a.MatchTermsForUser(userID, cityTerms) 42 | if err != nil { 43 | panic(err) 44 | } 45 | log.Printf("city=%v \t-> foundTerms=%+v", city, foundTerms) 46 | // check if there are results for city 47 | if len(foundTerms) > 0 { 48 | foundCities[city] = true 49 | } else { 50 | foundCities[city] = false 51 | } 52 | } 53 | log.Printf("foundCities=%+v", foundCities) 54 | 55 | res := make([]string, 0, len(foundCities)) 56 | log.Printf("Could identify following cities for user=%v: {", userID) 57 | for city, found := range foundCities { 58 | if found == true { 59 | res = append(res, city) 60 | fmt.Printf(" * %v\n", city) 61 | } 62 | } 63 | fmt.Println("}") 64 | } 65 | -------------------------------------------------------------------------------- /frontend/src/pages/ExampleProfileSelection.jsx: -------------------------------------------------------------------------------- 1 | import React, { Component, useState } from "react"; 2 | import { withRouter, history } from "react-router"; 3 | import Button from "../components/Button"; 4 | import H1 from "../components/H1"; 5 | import H2 from "../components/H2"; 6 | import { 7 | User, 8 | UserNameRequest, 9 | UserIdRequest, 10 | Post, 11 | UserIdResponse, 12 | UserSearchResponse 13 | } from "../protofiles/usersearch_pb.js"; 14 | import { UserSearchServiceClient } from "../protofiles/usersearch_grpc_web_pb"; 15 | async function fetchExampleProfile(apiClient) { 16 | const requestUser = new UserNameRequest(); 17 | 18 | requestUser.setUserName("codeuniversity"); 19 | const response = await apiClient.getUserWithUsername(requestUser); 20 | const user = response.toObject(); 21 | return user; 22 | } 23 | function ExampleProfileSelection(props) { 24 | return ( 25 |
26 |
27 |

We couldn't find any data of you.

28 |
29 |

30 | Would you like to continue the exhibition with the profile of CODE? 31 |

32 |
33 |
34 | 35 |
36 |
37 | 47 |
48 |
49 |
50 |
51 | ); 52 | } 53 | 54 | export default ExampleProfileSelection; 55 | -------------------------------------------------------------------------------- /frontend/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | import Form from "./components/Form"; 3 | import H1 from "./components/H1"; 4 | import Result from "./components/Result"; 5 | import "./index.css"; 6 | import { 7 | User, 8 | UserNameRequest, 9 | UserIdRequest, 10 | Post, 11 | UserIdResponse, 12 | UserSearchResponse 13 | } from "./protofiles/usersearch_pb.js"; 14 | import { UserSearchServiceClient } from "./protofiles/usersearch_grpc_web_pb"; 15 | import { withRouter } from "react-router"; 16 | import PropTypes from "prop-types"; 17 | 18 | // eslint-disable-next-line 19 | 20 | class App extends Component { 21 | handleSubmit = userName => { 22 | const userSearch = new UserSearchServiceClient("http://localhost:4000"); 23 | 24 | const requestUser = new UserNameRequest(); 25 | 26 | requestUser.setUserName(userName); 27 | userSearch.getUserWithUsername(requestUser, {}, (err, response) => { 28 | if (err) { 29 | console.log(err); 30 | return; 31 | } 32 | const user = { 33 | id: response.getId(), 34 | bio: response.getBio(), 35 | avatarurl: response.getAvatarUrl(), 36 | username: response.getUserName(), 37 | realname: response.getRealName() 38 | }; 39 | 40 | this.props.history.push({ 41 | pathname: "/result", 42 | state: { user } 43 | }); 44 | }); 45 | }; 46 | render() { 47 | return ( 48 |
49 |
50 |

Find out your public digital identity!

51 | 52 |
53 |
54 | ); 55 | } 56 | } 57 | 58 | App.propTypes = { 59 | history: PropTypes.shape({ 60 | push: PropTypes.shape({ 61 | pathname: PropTypes.string, 62 | state: PropTypes.object 63 | }) 64 | }) 65 | }; 66 | 67 | export default withRouter(App); 68 | -------------------------------------------------------------------------------- /frontend/src/components/Start.js: -------------------------------------------------------------------------------- 1 | import React, { useState } from "react"; 2 | import { CameraFeed } from "./camera-feed"; 3 | import H1 from "./H1"; 4 | import H2 from "./H2"; 5 | import { FaceSearchRequest } from "../protofiles/usersearch_pb"; 6 | import IGPost from "./IGPost"; 7 | 8 | function findFacesInImage(apiClient, onFindFaces) { 9 | return async file => { 10 | const reader = new FileReader(); 11 | reader.onloadend = async () => { 12 | const dataUrl = reader.result; 13 | const base64Data = dataUrl.split(",")[1]; 14 | 15 | const request = new FaceSearchRequest(); 16 | request.setBase64encodedpicture(base64Data); 17 | const response = await apiClient.searchSimilarFaces(request); 18 | const faces = response.getFacesList().map(protoFace => ({ 19 | postId: protoFace.getPostId(), 20 | x: protoFace.getX(), 21 | y: protoFace.getY(), 22 | width: protoFace.getWidth(), 23 | height: protoFace.getHeight(), 24 | fullImageSrc: protoFace.getFullImageSrc() 25 | })); 26 | 27 | onFindFaces(faces); 28 | }; 29 | reader.readAsDataURL(file); 30 | }; 31 | } 32 | 33 | function Start({ apiClient, faceHits, addFaceHits, progress }) { 34 | const onFileSubmit = findFacesInImage(apiClient, addFaceHits); 35 | 36 | return ( 37 |
38 |
39 |

40 | Are you aware that wherever you are recorded, 41 |
your identity can be found? 42 |

43 | 44 |
53 |
54 |
55 |
56 | ); 57 | } 58 | 59 | export default Start; 60 | -------------------------------------------------------------------------------- /imgproxy/urlbuilder.go: -------------------------------------------------------------------------------- 1 | package imgproxy 2 | 3 | import ( 4 | "crypto/hmac" 5 | "crypto/sha256" 6 | "encoding/base64" 7 | "encoding/hex" 8 | "errors" 9 | "fmt" 10 | ) 11 | 12 | // URLBuilder simplifies constructing urls for the imgproxy 13 | type URLBuilder struct { 14 | keyBin []byte 15 | saltBin []byte 16 | proxyAddress string 17 | } 18 | 19 | // New returns a URLBuilder or an error if the salt or key is not hex-encoded 20 | func New(proxyAddress, key, salt string) (*URLBuilder, error) { 21 | var keyBin, saltBin []byte 22 | var err error 23 | 24 | if keyBin, err = hex.DecodeString(key); err != nil { 25 | return nil, errors.New("Key expected to be hex-encoded string") 26 | } 27 | 28 | if saltBin, err = hex.DecodeString(salt); err != nil { 29 | return nil, errors.New("Salt expected to be hex-encoded string") 30 | } 31 | 32 | return &URLBuilder{ 33 | keyBin: keyBin, 34 | saltBin: saltBin, 35 | proxyAddress: proxyAddress, 36 | }, nil 37 | } 38 | 39 | // GetCropURL returns a url that instructs the imgproxy to crop out the given cordinates of the image 40 | // with a sourceURL in the form "s3:///" the image proxy will download the image from s3 41 | func (b *URLBuilder) GetCropURL(x, y, width, height int, sourceURL string) string { 42 | encodedURL := base64.RawURLEncoding.EncodeToString([]byte(sourceURL)) 43 | gravity := fmt.Sprintf("nowe:%d:%d", x, y) 44 | extension := "jpg" 45 | path := fmt.Sprintf("/crop:%d:%d:%s/%s.%s", width, height, gravity, encodedURL, extension) 46 | 47 | mac := hmac.New(sha256.New, b.keyBin) 48 | mac.Write(b.saltBin) 49 | mac.Write([]byte(path)) 50 | signature := base64.RawURLEncoding.EncodeToString(mac.Sum(nil))[:32] 51 | 52 | return fmt.Sprintf("http://%s/%s%s", b.proxyAddress, signature, path) 53 | } 54 | 55 | // GetS3Url returns a url in the form "s3:///" 56 | func (b *URLBuilder) GetS3Url(bucketName, path string) string { 57 | return fmt.Sprintf("s3://%s/%s", bucketName, path) 58 | } 59 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/following_scraper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import twint 4 | 5 | from .scraper_manager import ScraperManager 6 | from .utils import get_conf, ShallowTwitterUser 7 | 8 | 9 | def scrape(user_name: str) -> twint.user.user: 10 | user = ShallowTwitterUser(user_name) 11 | 12 | conf = get_conf(user_name) 13 | user.following_list = scrape_follows_list(twint.run.Following, conf) 14 | 15 | return user 16 | 17 | 18 | def scrape_follows_list(func, conf: twint.Config) -> list: 19 | func(conf) 20 | 21 | # if we only scrape user names (set conf.User_full = False) user names are in follows_list 22 | # if we scrape profiles of follows (set conf.User_full = True) user objs are in users_list 23 | ret = [] 24 | ret.extend(twint.output.follows_list) 25 | ret.extend(twint.output.users_list) 26 | twint.output.follows_list = [] 27 | twint.output.users_list = [] 28 | return ret 29 | 30 | 31 | class FollowingScraper(ScraperManager): 32 | name = "following_scraper" 33 | 34 | @staticmethod 35 | def scrape(user_name: str): 36 | logging.info(f"scrape user {user_name}s followings") 37 | user = scrape(user_name) 38 | return user 39 | 40 | 41 | if __name__ == "__main__": 42 | import os 43 | 44 | logging.basicConfig( 45 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s", 46 | datefmt="%H:%M:%S", 47 | level=logging.INFO, 48 | ) 49 | 50 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names") 51 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped") 52 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "following_scraper") 53 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092") 54 | 55 | following_scraper = FollowingScraper( 56 | insert_topic=insert_topic, 57 | fetch_topic=fetch_topic, 58 | kafka_consumer_group=kafka_consumer_group, 59 | kafka_address=kafka_address, 60 | ) 61 | following_scraper.run() 62 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/followers_scraper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import twint 4 | 5 | from .scraper_manager import ScraperManager 6 | from .utils import get_conf, ShallowTwitterUser 7 | 8 | 9 | def scrape(user_name: str) -> twint.user.user: 10 | user = ShallowTwitterUser(user_name) 11 | 12 | conf = get_conf(user_name) 13 | user.followers_list = scrape_follows_list(twint.run.Followers, conf) 14 | 15 | return user 16 | 17 | 18 | def scrape_follows_list(func, conf: twint.Config) -> list: 19 | func(conf) 20 | 21 | # if we only scrape user names (set conf.User_full = False) user names are in follows_list 22 | # if we scrape profiles of follows (set conf.User_full = True) user objs are in users_list 23 | ret = [] 24 | ret.extend(twint.output.follows_list) 25 | ret.extend(twint.output.users_list) 26 | twint.output.follows_list = [] 27 | twint.output.users_list = [] 28 | return ret 29 | 30 | 31 | class FollowersScraper(ScraperManager): 32 | name = "follower_scraper" 33 | 34 | @staticmethod 35 | def scrape(user_name: str): 36 | logging.info(f"scrape user {user_name}s followers") 37 | user = scrape(user_name) 38 | return user 39 | 40 | 41 | if __name__ == "__main__": 42 | import os 43 | 44 | logging.basicConfig( 45 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s", 46 | datefmt="%H:%M:%S", 47 | level=logging.INFO, 48 | ) 49 | 50 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names") 51 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped") 52 | kafka_consumer_group = os.getenv( 53 | "KAFKA_CONSUMER_GROUP", "followers_scraper") 54 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092") 55 | 56 | follower_scraper = FollowersScraper( 57 | insert_topic=insert_topic, 58 | fetch_topic=fetch_topic, 59 | kafka_consumer_group=kafka_consumer_group, 60 | kafka_address=kafka_address, 61 | ) 62 | follower_scraper.run() 63 | -------------------------------------------------------------------------------- /frontend/src/components/FaceHitAnimation.js: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import "./FaceHitAnimation.css"; 3 | 4 | function FaceHitAnimation(props) { 5 | // hack to freeze faceHits on first mount 6 | const [faceHits, setFaceHits] = useState({}); 7 | useEffect(() => { 8 | setFaceHits(props.faceHits); 9 | }, []); 10 | 11 | const images = Object.entries(faceHits) 12 | .map(([postId, faces]) => ({ 13 | postId, 14 | weight: faces.length, 15 | imageSrc: faces[0].fullImageSrc 16 | })) 17 | .sort((a, b) => b.weigth - a.weight) 18 | .reverse(); 19 | 20 | const [imageCoordinates, setImageCoordinates] = useState([]); 21 | 22 | useEffect(() => { 23 | if (imageCoordinates.length >= images.length) { 24 | const timeoutId = setTimeout(props.onAnimationFinished, 2000); 25 | return () => clearTimeout(timeoutId); 26 | } 27 | 28 | const intervalId = setInterval(() => { 29 | setImageCoordinates(prevCoordinates => [ 30 | ...prevCoordinates, 31 | { top: randomCoordinate(), left: randomCoordinate() } 32 | ]); 33 | }, 250); 34 | return () => clearInterval(intervalId); 35 | }, [imageCoordinates.length, images.length]); 36 | 37 | return ( 38 | <> 39 | {imageCoordinates.map((coord, index) => ( 40 | 47 | ))} 48 | 49 | ); 50 | } 51 | 52 | export default FaceHitAnimation; 53 | 54 | function BackgroundImage({ imageSrc, top, left, last }) { 55 | if (last) { 56 | return ; 57 | } 58 | 59 | return ( 60 | 65 | ); 66 | } 67 | 68 | function randomCoordinate() { 69 | return Math.round(Math.random() * 100); 70 | } 71 | -------------------------------------------------------------------------------- /worker/worker.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | "time" 8 | 9 | "github.com/codeuniversity/smag-mvp/service" 10 | ) 11 | 12 | // Worker abstracts away all the executor lifecycle hooks, exposing a much more high level api 13 | type Worker struct { 14 | executor *service.Executor 15 | name string 16 | step func() error 17 | shutdownHooks []shutdownHook 18 | 19 | stopTimeout time.Duration 20 | 21 | shutdownOnce sync.Once 22 | } 23 | 24 | // Start tells the worker to go and do work in another goroutine. 25 | // Returns a wait func that blocks until the worker is closed or encountered an error it can't handle 26 | func (w *Worker) Start() (wait func()) { 27 | go w.work() 28 | 29 | return w.executor.WaitUntilClosed 30 | } 31 | 32 | // Close Worker work 33 | func (w *Worker) Close() { 34 | w.shutdownOnce.Do(w.shutdown) 35 | } 36 | 37 | func (w *Worker) shutdown() { 38 | log.Println("stopping", w.name) 39 | w.executor.Stop() 40 | log.Println("waiting for work to stop") 41 | w.executor.WaitUntilStopped(w.stopTimeout) 42 | 43 | log.Println("calling shutdown hooks") 44 | for _, hook := range w.shutdownHooks { 45 | log.Println("shutting down", hook.name) 46 | err := w.callHookWithRecover(hook.f) 47 | if err != nil { 48 | log.Println("encountered error on shutdown: ", err) 49 | } 50 | } 51 | 52 | log.Println(w.name, "is shut down") 53 | w.executor.MarkAsClosed() 54 | } 55 | 56 | func (w *Worker) work() { 57 | defer w.Close() 58 | 59 | defer func() { 60 | log.Println(w.name, "is done here") 61 | w.executor.MarkAsStopped() 62 | }() 63 | 64 | log.Println("starting", w.name) 65 | 66 | for w.executor.IsRunning() { 67 | err := w.callHookWithRecover(w.step) 68 | if err != nil { 69 | log.Println("encountered error while working: ", err) 70 | return 71 | } 72 | } 73 | } 74 | 75 | func (w *Worker) callHookWithRecover(hook func() error) (err error) { 76 | defer func() { 77 | if r := recover(); r != nil { 78 | err = fmt.Errorf("recovered from panic: %s", r) 79 | } 80 | }() 81 | 82 | err = hook() 83 | return 84 | } 85 | -------------------------------------------------------------------------------- /insta/filter/post_pictures/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/insta/models" 7 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 8 | "github.com/codeuniversity/smag-mvp/service" 9 | "github.com/codeuniversity/smag-mvp/utils" 10 | 11 | "github.com/segmentio/kafka-go" 12 | ) 13 | 14 | func main() { 15 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 17 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts") 18 | downloadTopic := utils.GetStringFromEnvWithDefault("KAFKA_PICTURE_DOWNLOADS_TOPIC", "insta_post_picture_download_jobs") 19 | 20 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, downloadTopic, filterChange) 21 | 22 | service.CloseOnSignal(f) 23 | waitUntilClosed := f.Start() 24 | 25 | waitUntilClosed() 26 | } 27 | 28 | type post struct { 29 | ID int `json:"id"` 30 | PictureURL string `json:"picture_url"` 31 | } 32 | 33 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) { 34 | if !(m.Payload.Op == "c" || m.Payload.Op == "u") { 35 | return nil, nil 36 | } 37 | 38 | currentVersion := &post{} 39 | err := json.Unmarshal(m.Payload.After, currentVersion) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | if currentVersion.PictureURL == "" { 45 | return nil, nil 46 | } 47 | 48 | if m.Payload.Op == "c" { 49 | return constructDownloadJobMessage(currentVersion) 50 | } 51 | 52 | previousVersion := &post{} 53 | err = json.Unmarshal(m.Payload.Before, previousVersion) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | if currentVersion.PictureURL != previousVersion.PictureURL { 59 | return constructDownloadJobMessage(currentVersion) 60 | } 61 | 62 | return nil, nil 63 | } 64 | 65 | func constructDownloadJobMessage(p *post) ([]kafka.Message, error) { 66 | job := &models.PostDownloadJob{ 67 | PostID: p.ID, 68 | PictureURL: p.PictureURL, 69 | } 70 | b, err := json.Marshal(job) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | return []kafka.Message{ 76 | {Value: b}, 77 | }, nil 78 | } 79 | -------------------------------------------------------------------------------- /worker/builder.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "errors" 5 | "time" 6 | 7 | "github.com/codeuniversity/smag-mvp/service" 8 | ) 9 | 10 | const ( 11 | defaultStopTimeout = 5 * time.Second 12 | ) 13 | 14 | // Builder is for configuring a worker 15 | type Builder struct { 16 | name string 17 | step func() error 18 | shutdownHooks []shutdownHook 19 | stopTimeout time.Duration 20 | } 21 | 22 | // WithName sets the worker name (required) 23 | func (b Builder) WithName(n string) Builder { 24 | b.name = n 25 | return b 26 | } 27 | 28 | // WithWorkStep sets the step func a worker should repeatedly call (required) 29 | func (b Builder) WithWorkStep(s func() error) Builder { 30 | b.step = s 31 | return b 32 | } 33 | 34 | // AddShutdownHook registers the hook to be called on shutdown of the worker 35 | func (b Builder) AddShutdownHook(hookName string, hook func() error) Builder { 36 | b.shutdownHooks = append(b.shutdownHooks, shutdownHook{ 37 | f: hook, 38 | name: hookName, 39 | }) 40 | 41 | return b 42 | } 43 | 44 | // WithStopTimeout changes the timeout used when stopping the worker loop. If not set, uses defaultStopTimeout 45 | func (b Builder) WithStopTimeout(t time.Duration) Builder { 46 | b.stopTimeout = t 47 | return b 48 | } 49 | 50 | // Build a Worker with the given configuration 51 | func (b Builder) Build() (*Worker, error) { 52 | if !b.valid() { 53 | return nil, errors.New("could not build worker: both name and work step have to be set") 54 | } 55 | w := &Worker{ 56 | executor: service.New(), 57 | name: b.name, 58 | step: b.step, 59 | shutdownHooks: b.shutdownHooks, 60 | stopTimeout: b.stopTimeout, 61 | } 62 | 63 | if w.stopTimeout == 0 { 64 | w.stopTimeout = defaultStopTimeout 65 | } 66 | 67 | return w, nil 68 | } 69 | 70 | // MustBuild a Worker with the given configuration. Panics if not all required config is given 71 | func (b Builder) MustBuild() *Worker { 72 | w, err := b.Build() 73 | 74 | if err != nil { 75 | panic(err) 76 | } 77 | 78 | return w 79 | } 80 | 81 | func (b Builder) valid() bool { 82 | return b.name != "" && b.step != nil 83 | } 84 | 85 | type shutdownHook struct { 86 | f func() error 87 | name string 88 | } 89 | -------------------------------------------------------------------------------- /frontend/public/background.js: -------------------------------------------------------------------------------- 1 | var typeface; 2 | 3 | var fontSize = 18; 4 | var streams = []; 5 | var fadeInterval = 1; 6 | 7 | function preload() { 8 | typeface = loadFont("Barlow-SemiBold.otf"); 9 | } 10 | 11 | function setup() { 12 | createCanvas(windowWidth, windowHeight); 13 | textSize(fontSize); 14 | 15 | var xstream = 0; 16 | for (var i = 0; i <= width / fontSize; i++) { 17 | var stream = new Stream(); 18 | stream.generateLetters(xstream, random(-windowHeight, windowHeight)); 19 | streams.push(stream); 20 | xstream += fontSize; 21 | } 22 | } 23 | 24 | function draw() { 25 | background(0, 200); 26 | noStroke(); 27 | 28 | streams.forEach(function(stream) { 29 | stream.render(); 30 | }); 31 | } 32 | 33 | function Letter(x, y, speed, first, opacity) { 34 | this.x = x; 35 | this.y = y; 36 | this.value; 37 | this.speed = speed; 38 | this.switch = round(random(10, 80)); 39 | this.first = first; 40 | this.opacity = opacity; 41 | 42 | this.RandomLetter = function() { 43 | if (frameCount % this.switch === 0) { 44 | this.value = String.fromCharCode(0x00 + round(random(48, 90))); 45 | } 46 | }; 47 | 48 | this.rain = function() { 49 | this.y = this.y >= height ? 0 : (this.y += this.speed); 50 | }; 51 | } 52 | 53 | function Stream() { 54 | this.letters = []; 55 | this.totalLetters = round(random(15, 30)); 56 | this.speed = random(2, 5); 57 | 58 | this.generateLetters = function(x, y) { 59 | var opacity = 255; 60 | var first = round(random(0, 4)) === 1; 61 | for (var i = 0; i <= this.totalLetters; i++) { 62 | letter = new Letter(x, y, this.speed, first, opacity); 63 | letter.RandomLetter(); 64 | this.letters.push(letter); 65 | opacity -= 255 / this.totalLetters / fadeInterval; 66 | y -= fontSize; 67 | first = false; 68 | } 69 | }; 70 | 71 | this.render = function() { 72 | this.letters.forEach(function(letter) { 73 | if (letter.first) { 74 | fill(150, 220, 255, letter.opacity); 75 | } else { 76 | fill(42, 159, 216, letter.opacity); 77 | } 78 | text(letter.value, letter.x, letter.y); 79 | letter.rain(); 80 | letter.RandomLetter(); 81 | }); 82 | }; 83 | } 84 | -------------------------------------------------------------------------------- /insta/filter/post_face-recon/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/codeuniversity/smag-mvp/insta/models" 7 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 8 | "github.com/codeuniversity/smag-mvp/service" 9 | "github.com/codeuniversity/smag-mvp/utils" 10 | 11 | "github.com/segmentio/kafka-go" 12 | ) 13 | 14 | func main() { 15 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092") 16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 17 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts") 18 | downloadTopic := utils.GetStringFromEnvWithDefault("KAFKA_PICTURE_FACE_RECON_TOPIC", "insta_post_face_recon_jobs") 19 | 20 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, downloadTopic, filterChange) 21 | 22 | service.CloseOnSignal(f) 23 | waitUntilClosed := f.Start() 24 | 25 | waitUntilClosed() 26 | } 27 | 28 | type post struct { 29 | ID int `json:"id"` 30 | InternalPictureURL string `json:"internal_picture_url"` 31 | } 32 | 33 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) { 34 | if !(m.Payload.Op == "c" || m.Payload.Op == "u") { 35 | return nil, nil 36 | } 37 | 38 | currentVersion := &post{} 39 | err := json.Unmarshal(m.Payload.After, currentVersion) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | if m.Payload.Op == "c" { 45 | return constructDownloadJobMessage(currentVersion) 46 | } 47 | 48 | previousVersion := &post{} 49 | err = json.Unmarshal(m.Payload.Before, previousVersion) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | if currentVersion.InternalPictureURL != previousVersion.InternalPictureURL { 55 | return constructDownloadJobMessage(currentVersion) 56 | } 57 | 58 | return nil, nil 59 | } 60 | 61 | func constructDownloadJobMessage(p *post) ([]kafka.Message, error) { 62 | if p.InternalPictureURL == "" { 63 | return nil, nil 64 | } 65 | 66 | job := &models.PostDownloadJob{ 67 | PostID: p.ID, 68 | PictureURL: p.InternalPictureURL, 69 | } 70 | b, err := json.Marshal(job) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | return []kafka.Message{ 76 | {Value: b}, 77 | }, nil 78 | } 79 | -------------------------------------------------------------------------------- /insta/filter/post_pictures/filter_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestFilter(t *testing.T) { 11 | validPayloadJSON := []byte("{\"id\":1,\"picture_url\":\"https://test.test\"}") 12 | preUpdatePayloadJSON := []byte("{\"id\":1,\"picture_url\":\"https://test2.test2\"}") 13 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"picture_url\":\"https://test.test\"}") 14 | 15 | t.Run("create event with unmarshable json", func(t *testing.T) { 16 | //create test input 17 | changeMsg := &changestream.ChangeMessage{} 18 | changeMsg.Payload.Op = "c" 19 | changeMsg.Payload.After = validPayloadJSON 20 | 21 | kMessages, err := filterChange(changeMsg) 22 | 23 | expected := "{\"post_id\":1,\"picture_url\":\"https://test.test\"}" 24 | 25 | assert.Nil(t, err, "no error") 26 | assert.Equal(t, 1, len(kMessages)) 27 | assert.Equal(t, expected, string(kMessages[0].Value)) 28 | }) 29 | 30 | t.Run("update event with unmarshable json", func(t *testing.T) { 31 | //create test input 32 | changeMsg := &changestream.ChangeMessage{} 33 | changeMsg.Payload.Op = "u" 34 | changeMsg.Payload.Before = preUpdatePayloadJSON 35 | changeMsg.Payload.After = validPayloadJSON 36 | 37 | kMessages, err := filterChange(changeMsg) 38 | 39 | expected := "{\"post_id\":1,\"picture_url\":\"https://test.test\"}" 40 | 41 | assert.Nil(t, err, "no error") 42 | assert.Equal(t, 1, len(kMessages)) 43 | assert.Equal(t, expected, string(kMessages[0].Value)) 44 | }) 45 | 46 | t.Run("create event with not unmarshable json", func(t *testing.T) { 47 | //create test input 48 | changeMsg := &changestream.ChangeMessage{} 49 | changeMsg.Payload.Op = "c" 50 | changeMsg.Payload.After = invalidPayloadJSON 51 | 52 | kMessages, err := filterChange(changeMsg) 53 | 54 | assert.NotNil(t, err, "error occurs") 55 | assert.Nil(t, kMessages, "nil output") 56 | }) 57 | 58 | t.Run("ignored event", func(t *testing.T) { 59 | //create test input 60 | changeMsg := &changestream.ChangeMessage{} 61 | changeMsg.Payload.Op = "d" 62 | 63 | kMessages, err := filterChange(changeMsg) 64 | 65 | assert.Nil(t, err, "no error") 66 | assert.Nil(t, kMessages, "nil output") 67 | }) 68 | } 69 | -------------------------------------------------------------------------------- /insta/indexer/faces/insta_faces_indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | 7 | "github.com/codeuniversity/smag-mvp/elastic" 8 | "github.com/codeuniversity/smag-mvp/elastic/indexer" 9 | esModels "github.com/codeuniversity/smag-mvp/elastic/models" 10 | "github.com/codeuniversity/smag-mvp/insta/models" 11 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 12 | "github.com/codeuniversity/smag-mvp/service" 13 | "github.com/codeuniversity/smag-mvp/utils" 14 | ) 15 | 16 | func main() { 17 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 18 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 19 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10) 20 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.face_data") 21 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5) 22 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9200"}) 23 | 24 | i := indexer.New(esHosts, elastic.FacesIndex, elastic.FacesIndexMapping, kafkaAddress, changesTopic, groupID, indexFace, bulkChunkSize, bulkFetchTimeoutSeconds) 25 | 26 | service.CloseOnSignal(i) 27 | waitUntilDone := i.Start() 28 | 29 | waitUntilDone() 30 | } 31 | 32 | func indexFace(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) { 33 | 34 | switch m.Payload.Op { 35 | case "r", "u", "c": 36 | break 37 | default: 38 | return nil, nil 39 | } 40 | 41 | face := &models.FaceData{} 42 | err := json.Unmarshal(m.Payload.After, face) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | return createBulkIndexOperation(face) 48 | } 49 | 50 | func createBulkIndexOperation(face *models.FaceData) (*indexer.BulkIndexDoc, error) { 51 | bulkOperation := `{ "index": {} }` + "\n" 52 | 53 | doc, err := esModels.FaceDocFromFaceData(face) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | docJson, err := json.Marshal(doc) 59 | 60 | if err != nil { 61 | return nil, err 62 | } 63 | 64 | docJson = append(docJson, "\n"...) 65 | 66 | bulkUpsertBody := bulkOperation + string(docJson) 67 | 68 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(int(face.ID)), BulkOperation: bulkUpsertBody}, err 69 | 70 | } 71 | -------------------------------------------------------------------------------- /frontend/src/components/camera-feed.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from "react"; 2 | 3 | export class CameraFeed extends Component { 4 | /** 5 | * Processes available devices and identifies one by the label 6 | * @memberof CameraFeed 7 | * @instance 8 | */ 9 | processDevices(devices) { 10 | devices.forEach(device => { 11 | this.setDevice(device); 12 | }); 13 | } 14 | 15 | /** 16 | * Sets the active device and starts playing the feed 17 | * @memberof CameraFeed 18 | * @instance 19 | */ 20 | async setDevice(device) { 21 | const { deviceId } = device; 22 | const stream = await navigator.mediaDevices.getUserMedia({ 23 | audio: false, 24 | video: { deviceId } 25 | }); 26 | this.videoPlayer.srcObject = stream; 27 | this.videoPlayer.play(); 28 | } 29 | 30 | /** 31 | * On mount, grab the users connected devices and process them 32 | * @memberof CameraFeed 33 | * @instance 34 | * @override 35 | */ 36 | async componentDidMount() { 37 | const cameras = await navigator.mediaDevices.enumerateDevices(); 38 | this.processDevices(cameras); 39 | 40 | this.intervalId = setInterval(() => { 41 | this.takePhoto(); 42 | }, 2000); 43 | } 44 | 45 | componentWillUnmount() { 46 | clearInterval(this.intervalId); 47 | this.videoPlayer.pause(); 48 | } 49 | 50 | /** 51 | * Handles taking a still image from the video feed on the camera 52 | * @memberof CameraFeed 53 | * @instance 54 | */ 55 | takePhoto = () => { 56 | const { onFileSubmit } = this.props; 57 | const context = this.canvas.getContext("2d"); 58 | context.drawImage(this.videoPlayer, 0, 0, 1280, 720); 59 | this.canvas.toBlob(onFileSubmit); 60 | }; 61 | 62 | stopTakingPictures = () => { 63 | clearInterval(this.intervalId); 64 | }; 65 | 66 | render() { 67 | return ( 68 |
69 |
70 |
77 |
78 | (this.canvas = ref)} /> 79 |
80 |
81 | ); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /kafka/changestream/filter.go: -------------------------------------------------------------------------------- 1 | package changestream 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "time" 8 | 9 | kf "github.com/codeuniversity/smag-mvp/kafka" 10 | "github.com/codeuniversity/smag-mvp/worker" 11 | 12 | "github.com/segmentio/kafka-go" 13 | ) 14 | 15 | // Filter is responsible for reading the change stream, 16 | // filtering out the events that are not interesting to us 17 | // and writing new messages based on the changes to the filtered topic 18 | type Filter struct { 19 | *worker.Worker 20 | 21 | changesReader *kafka.Reader 22 | filteredWriter *kafka.Writer 23 | 24 | filterFunc FilterFunc 25 | } 26 | 27 | // FilterFunc given a ChangeMessage from the changesTopic 28 | // returns zero, one or multiple kafka Messages that should be written to the filteredTopic 29 | type FilterFunc func(*ChangeMessage) ([]kafka.Message, error) 30 | 31 | // NewFilter returns an initilized Filter 32 | func NewFilter(kafkaAddress, kafkaGroupID, changesTopic, filteredTopic string, filter FilterFunc) *Filter { 33 | readerConfig := kf.NewReaderConfig(kafkaAddress, kafkaGroupID, changesTopic) 34 | writerConfig := kf.NewWriterConfig(kafkaAddress, filteredTopic, true) 35 | 36 | f := &Filter{ 37 | changesReader: kf.NewReader(readerConfig), 38 | filteredWriter: kf.NewWriter(writerConfig), 39 | filterFunc: filter, 40 | } 41 | 42 | b := worker.Builder{}.WithName(fmt.Sprintf("changestream_filter[%s->%s]", changesTopic, filteredTopic)). 43 | WithWorkStep(f.runStep). 44 | WithStopTimeout(10*time.Second). 45 | AddShutdownHook("changesReader", f.changesReader.Close). 46 | AddShutdownHook("filteredWriter", f.filteredWriter.Close) 47 | 48 | f.Worker = b.MustBuild() 49 | 50 | return f 51 | } 52 | 53 | func (t *Filter) runStep() error { 54 | m, err := t.changesReader.FetchMessage(context.Background()) 55 | if err != nil { 56 | return err 57 | } 58 | 59 | changeMessage := &ChangeMessage{} 60 | err = json.Unmarshal(m.Value, changeMessage) 61 | if err != nil { 62 | return err 63 | } 64 | 65 | kafkaMessages, err := t.filterFunc(changeMessage) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | if len(kafkaMessages) > 0 { 71 | err = t.filteredWriter.WriteMessages(context.Background(), kafkaMessages...) 72 | if err != nil { 73 | return err 74 | } 75 | } 76 | 77 | return t.changesReader.CommitMessages(context.Background(), m) 78 | 79 | } 80 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | # SMAG gRPC Web API 2 | 3 | ## About 4 | 5 | In our project we are using a [gRPC Web](https://grpc.io/docs/) API. For that we are using an [envoy proxy](https://www.envoyproxy.io/docs/envoy/latest/) to be able to connect to the gRPC Server. As our system is not publicly accessible an AWS Account in our Organisation with the appropriate access is required. 6 | 7 | ## Requirements 8 | 9 | In order to successfully use our api make sure to have: 10 | 11 | - a running [kubernetes setup](https://github.com/codeuniversity/smag-deploy/blob/master/README.md) (permssion required) 12 | - _optional for local testing_: [protoc](http://google.github.io/proto-lens/installing-protoc.html) to generate the protofiles for the frontend 13 | 14 | ## Usage 15 | 16 | To use the production enviroment do the following steps: 17 | 18 | 1. Get name of envoy proxy `kubectl get pods | grep envoy` 19 | 2. Forward the envoy-pod port with `kubectl port-forward envoy-proxy-deployment-6b89675d5b-d86c4 4000:8080` 20 | 3. To make use of the API in the React Frontend import and run the following: 21 | ```javascript 22 | import { 23 | User, 24 | UserNameRequest, 25 | UserIdRequest, 26 | InstaPostsResponse, 27 | UserSearchResponse 28 | } from "./protofiles/client/usersearch_pb.js"; 29 | import { UserSearchServiceClient } from "./protofiles/client/usersearch_grpc_web_pb.js"; 30 | var userSearch = new UserSearchServiceClient("http://localhost:4000"); 31 | var request = new UserName(); 32 | request.setUserName("codeuniversity"); 33 | userSearch.getUserWithUsername(request, {}, function(err, response) { 34 | //example function call... 35 | }); 36 | ``` 37 | 38 | ## Functions 39 | 40 | To check the attributes of the proto messages take a look at the protofile [userserach.proto](https://github.com/codeuniversity/smag-mvp/blob/master/api/proto/usersearch.proto) 41 | 42 | | **Method** | **Function Name** | **Input Message** | **Return Message** | 43 | | ---------- | ------------------------ | ----------------- | ------------------ | 44 | | GET | getUserWithUsername | UserNameRequest | User | 45 | | GET | getAllUsersLikeUsername | UserNameRequest | UserSearchResponse | 46 | | GET | getTaggedPostsWithUserId | UserIdRequest | InstaPostsResponse | 47 | | GET | getInstaPostssWithUserid | UserIdRequest | UserSearchResponse | 48 | -------------------------------------------------------------------------------- /service/executor.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "os/signal" 7 | "syscall" 8 | "time" 9 | ) 10 | 11 | //Service is a closeable service that usually includes an Executor 12 | type Service interface { 13 | Close() 14 | } 15 | 16 | // CloseOnSignal calls the closeFunc on the os signals SIGINT and SIGTERM 17 | func CloseOnSignal(s Service) { 18 | go func() { 19 | signals := make(chan os.Signal, 1) 20 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 21 | sig := <-signals 22 | log.Println("Received Signal:", sig) 23 | 24 | s.Close() 25 | }() 26 | } 27 | 28 | //Executor handles gracefully closing execution for services 29 | //Closing an executor goes through the states running, stopping, stopped and closed. 30 | type Executor struct { 31 | stopChan chan struct{} 32 | stoppedChan chan struct{} 33 | closedChan chan struct{} 34 | } 35 | 36 | //New returns an Executor ready for use. 37 | func New() *Executor { 38 | return &Executor{ 39 | stopChan: make(chan struct{}, 1), 40 | stoppedChan: make(chan struct{}, 1), 41 | closedChan: make(chan struct{}, 1), 42 | } 43 | } 44 | 45 | //IsRunning should be used to determine if the execution should be halted. 46 | // Will be false until Close() is called 47 | func (e *Executor) IsRunning() bool { 48 | return len(e.stopChan) == 0 49 | } 50 | 51 | // Stop is to be called when the executor should stop 52 | // This is only safe to call once 53 | func (e *Executor) Stop() { 54 | e.stopChan <- struct{}{} 55 | } 56 | 57 | //MarkAsStopped is to be called when the execution was stopped. 58 | //All used resources can be cracefully closed and dispossed of now 59 | func (e *Executor) MarkAsStopped() { 60 | e.stoppedChan <- struct{}{} 61 | } 62 | 63 | // WaitUntilStopped blocks until either MarkAsStopped() was called in any goroutine 64 | // or the timeout has passed 65 | func (e *Executor) WaitUntilStopped(timeout time.Duration) { 66 | t := time.NewTimer(timeout) 67 | select { 68 | case <-t.C: 69 | break 70 | case <-e.stoppedChan: 71 | t.Stop() 72 | break 73 | } 74 | } 75 | 76 | //MarkAsClosed is to be called when the service using the executor is closed. 77 | //The process can be stopped or killed now 78 | func (e *Executor) MarkAsClosed() { 79 | e.closedChan <- struct{}{} 80 | } 81 | 82 | // WaitUntilClosed waits until the Close func call of the executor is finished 83 | func (e *Executor) WaitUntilClosed() { 84 | <-e.closedChan 85 | } 86 | -------------------------------------------------------------------------------- /api/proto/usersearch.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package proto; 4 | 5 | message User { 6 | string id = 1; 7 | string user_name = 2; 8 | string real_name = 3; 9 | string bio = 4; 10 | string avatar_url = 5; 11 | repeated User followings = 6; 12 | repeated User followers = 7; 13 | } 14 | 15 | message UserSearchResponse { 16 | repeated User user_list = 1; 17 | } 18 | 19 | message UserNameRequest { 20 | string user_name = 1; 21 | } 22 | 23 | message UserIdRequest { 24 | string user_id = 1; 25 | } 26 | 27 | message InstaPost { 28 | string id = 1; 29 | string post_id = 2; 30 | string img_url = 3; 31 | string caption = 4; 32 | repeated User tagged_users = 5; 33 | string short_code = 6; 34 | 35 | } 36 | 37 | message InstaPostsResponse { 38 | string user_id = 1; 39 | repeated InstaPost insta_posts = 2; 40 | } 41 | 42 | message FaceSearchRequest { 43 | string base64EncodedPicture = 1; 44 | } 45 | 46 | message Face { 47 | int32 x = 1; 48 | int32 y = 2; 49 | int32 width = 3; 50 | int32 height = 4; 51 | int32 post_id = 5; 52 | string full_image_src = 6; 53 | } 54 | 55 | message FaceSearchResponse { 56 | repeated Face faces = 1; 57 | } 58 | 59 | message WeightedPostWithFaces { 60 | int32 post_id = 1; 61 | int32 weight = 2; 62 | repeated Face faces = 3; 63 | } 64 | 65 | message WeightedPosts{ 66 | repeated WeightedPostWithFaces posts = 1; 67 | } 68 | 69 | message UserWithFaces { 70 | User user = 1; 71 | repeated Face faces = 2; 72 | int32 weight = 3; 73 | } 74 | 75 | message WeightedUsers { 76 | repeated UserWithFaces users_with_faces = 1; 77 | } 78 | 79 | message UserDataPointCount { 80 | int32 count = 1; 81 | } 82 | 83 | message FoundCities { 84 | repeated string city_names = 1; 85 | } 86 | 87 | service UserSearchService { 88 | rpc GetAllUsersLikeUsername(UserNameRequest) returns (UserSearchResponse); 89 | rpc GetUserWithUserId(UserIdRequest) returns (User); 90 | rpc GetUserWithUsername(UserNameRequest) returns (User); 91 | rpc GetInstaPostsWithUserId(UserIdRequest) returns (InstaPostsResponse); 92 | rpc SearchSimilarFaces(FaceSearchRequest) returns(FaceSearchResponse); 93 | rpc SearchUsersWithWeightedPosts(WeightedPosts) returns (WeightedUsers); 94 | rpc DataPointCountForUserId(UserIdRequest) returns (UserDataPointCount); 95 | rpc FindCitiesForUserId(UserIdRequest) returns (FoundCities); 96 | } 97 | -------------------------------------------------------------------------------- /twitter/inserter/posts/inserter.go: -------------------------------------------------------------------------------- 1 | package inserter 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "time" 9 | 10 | "github.com/jinzhu/gorm" 11 | // necessary for gorm :pointup: 12 | _ "github.com/jinzhu/gorm/dialects/postgres" 13 | 14 | dbUtils "github.com/codeuniversity/smag-mvp/db" 15 | "github.com/codeuniversity/smag-mvp/twitter/models" 16 | "github.com/codeuniversity/smag-mvp/utils" 17 | "github.com/codeuniversity/smag-mvp/worker" 18 | 19 | "github.com/segmentio/kafka-go" 20 | ) 21 | 22 | // Inserter represents the inserter containing all clients it uses 23 | type Inserter struct { 24 | *worker.Worker 25 | 26 | qReader *kafka.Reader 27 | 28 | db *gorm.DB 29 | } 30 | 31 | // New returns an initilized inserter 32 | func New(postgresHost, postgresPassword, dbName string, qReader *kafka.Reader) *Inserter { 33 | i := &Inserter{} 34 | i.qReader = qReader 35 | 36 | connectionString := fmt.Sprintf("host=%s user=postgres dbname=%s sslmode=disable", postgresHost, dbName) 37 | if postgresPassword != "" { 38 | connectionString += " " + "password=" + postgresPassword 39 | } 40 | 41 | db, err := gorm.Open("postgres", connectionString) 42 | utils.PanicIfNotNil(err) 43 | i.db = db // use db.Debug() here to get detailed gorm logs 44 | 45 | db.AutoMigrate(&models.TwitterPost{}) 46 | 47 | b := worker.Builder{}.WithName("twitter_inserter_posts"). 48 | WithWorkStep(i.runStep). 49 | WithStopTimeout(10*time.Second). 50 | AddShutdownHook("qReader", qReader.Close). 51 | AddShutdownHook("postgres_client", db.Close) 52 | 53 | i.Worker = b.MustBuild() 54 | 55 | return i 56 | } 57 | 58 | // Run the inserter 59 | func (i *Inserter) runStep() error { 60 | m, err := i.qReader.FetchMessage(context.Background()) 61 | if err != nil { 62 | return err 63 | } 64 | 65 | rawPost := &models.TwitterPostRaw{} 66 | 67 | err = json.Unmarshal(m.Value, rawPost) 68 | if err != nil { 69 | return err 70 | } 71 | 72 | post := models.ConvertTwitterPost(rawPost) 73 | log.Println("inserting post:", post.Link) 74 | 75 | err = i.insertPost(post) 76 | if err != nil { 77 | return err 78 | } 79 | return i.qReader.CommitMessages(context.Background(), m) 80 | } 81 | 82 | func (i *Inserter) insertPost(post *models.TwitterPost) error { 83 | fromPost := &models.TwitterPost{} 84 | filter := &models.TwitterPost{PostIdentifier: post.PostIdentifier} 85 | 86 | err := dbUtils.CreateOrUpdate(i.db, fromPost, filter, post) 87 | if err != nil { 88 | return err 89 | } 90 | 91 | return nil 92 | } 93 | -------------------------------------------------------------------------------- /twitter/inserter/users/inserter.go: -------------------------------------------------------------------------------- 1 | package inserter 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "time" 9 | 10 | "github.com/jinzhu/gorm" 11 | // necessary for gorm :pointup: 12 | _ "github.com/jinzhu/gorm/dialects/postgres" 13 | 14 | dbUtils "github.com/codeuniversity/smag-mvp/db" 15 | "github.com/codeuniversity/smag-mvp/twitter/models" 16 | "github.com/codeuniversity/smag-mvp/utils" 17 | "github.com/codeuniversity/smag-mvp/worker" 18 | 19 | "github.com/segmentio/kafka-go" 20 | ) 21 | 22 | // Inserter represents the scraper containing all clients it uses 23 | type Inserter struct { 24 | *worker.Worker 25 | 26 | qReader *kafka.Reader 27 | 28 | db *gorm.DB 29 | } 30 | 31 | // New returns an initilized inserter 32 | func New(postgresHost, postgresPassword, dbName string, qReader *kafka.Reader) *Inserter { 33 | i := &Inserter{} 34 | i.qReader = qReader 35 | 36 | connectionString := fmt.Sprintf("host=%s user=postgres dbname=%s sslmode=disable", postgresHost, dbName) 37 | if postgresPassword != "" { 38 | connectionString += " " + "password=" + postgresPassword 39 | } 40 | 41 | db, err := gorm.Open("postgres", connectionString) 42 | utils.PanicIfNotNil(err) 43 | i.db = db // use db.Debug() here to get detailed gorm logs 44 | 45 | db.AutoMigrate(&models.TwitterUser{}) 46 | 47 | b := worker.Builder{}.WithName("twitter_inserter_users"). 48 | WithWorkStep(i.runStep). 49 | WithStopTimeout(10*time.Second). 50 | AddShutdownHook("qReader", qReader.Close). 51 | AddShutdownHook("postgres_client", db.Close) 52 | 53 | i.Worker = b.MustBuild() 54 | 55 | return i 56 | } 57 | 58 | func (i *Inserter) runStep() error { 59 | m, err := i.qReader.FetchMessage(context.Background()) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | rawUser := &models.TwitterUserRaw{} 65 | 66 | err = json.Unmarshal(m.Value, rawUser) 67 | if err != nil { 68 | return err 69 | } 70 | 71 | user := models.ConvertTwitterUser(rawUser) 72 | log.Println("inserting user: ", user.Username) 73 | 74 | err = i.insertUser(user) 75 | if err != nil { 76 | return err 77 | } 78 | 79 | return i.qReader.CommitMessages(context.Background(), m) 80 | } 81 | 82 | func (i *Inserter) insertUser(user *models.TwitterUser) error { 83 | var err error 84 | 85 | baseUser := &models.TwitterUser{} 86 | filter := &models.TwitterUser{Username: user.Username} 87 | 88 | err = dbUtils.CreateOrUpdate(i.db, baseUser, filter, user) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | return nil 94 | } 95 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | ## Social Record front-end 2 | 3 | The Social Record front-end displays the analyzed data from instagram of the visitor of the exhibition. Using the front-end the visitors can experience what public data social media providers expose about them. 4 | 5 | ### Prerequisities 6 | 7 | | | | | | | 8 | | -------------- | ------------------------------------------------------------------ | --- | --- | --- | 9 | | Node.js | `v10.16.0 | 10 | | Go | `go 1.13` ([go modules](https://blog.golang.org/using-go-modules)) | | | | 11 | | Docker | `v19.x` | | | | 12 | | Docker Compose | `v1.25.x` | | | | 13 | 14 | ### Getting started 15 | 16 | **1. Clone repo** [`https://github.com/codeuniversity/smag-mvp.git`](https://github.com/codeuniversity/smag-mvp.git) 17 | 18 | **2. Switch to the front-end folder** `cd smag-mvp/frontend` 19 | 20 | **3. Install all dependencies** `npm install` 21 | 22 | **4. Run the application in development mode** `npm start` (runs on `localhost:3000`) 23 | 24 | **5. To deploy to production you can create a minified bundle** `npm run build` 25 | 26 | **6. Run all services in docker to locally test the application.** 27 | 28 | 1. Start all services `make run` 29 | 2. Add `127.0.0.1 my-kafka` and `127.0.0.1. minio` to your `/etc/hosts` file 30 | 3. Choose a user_name as a starting point and run `go run cli/main/main.go instagram ` 31 | 32 | ### React Component Design 33 | 34 | | Name | Prop Name | Data Structure | Example | Description | 35 | | --------------- | ------------ | -------------- | --------------------- | ------------------------------------------------------------------ | 36 | | `` | h1 | String | `""` | Displays the h1 headline in the App component and Start component. | 37 | | `` | key | Function | `(post.shortcode)` | Displays the instagram post in the Result component. | 38 | | `` | onSubmit | Function | `(name.string)=>void` | Displays the search form in the App component. | 39 | | `Camera-Feed/>` | onFileSubmit | Function | `(file:File)=>void` | Implements the camera feed in the Start component. | 40 | -------------------------------------------------------------------------------- /frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 15 | 20 | 25 | 30 | 35 | 40 | 45 | 50 | 56 | 62 | 68 | 74 | 75 | 76 | 77 | 78 | 79 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | SMAG 91 | 92 | 93 | 94 |
95 | 96 | 97 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We are always happy about support for our project. If you dicide to help us spreading awareness for privacy, just start with the following steps: 4 | 1) Fork the project 5 | 2) Create a new branch 6 | 3) Commit your changes 7 | 4) Open a pull request to `master` 8 | 9 | (More about this in ["Branching & Naming"](#branching--naming)) 10 | 11 | For your Git Commit Messages, please orientate on the guidance in the following [article](https://chris.beams.io/posts/git-commit/): 12 | 13 | > - Limit the subject line to 50 characters 14 | > - Capitalize the subject line 15 | > - Use the imperative mood in the subject line 16 | > - Use the body to explain what and why, less how 17 | 18 | ### Pull Requests 19 | 20 | Every new feature must be developed on a feature branch and merged into master. **Please do not push directly to master!** We also provide a [Pull Request Template](https://github.com/codeuniversity/smag-mvp/blob/master/.github/pull_request_template.md) for additional guidance. In any case, the Pull Request has to be reviewed and approved by at least one other developer before merging. Please make sure to [reference the associated issue(s)](https://help.github.com/en/github/managing-your-work-on-github/closing-issues-using-keywords) in the pull request. 21 | 22 | ## Branching & Naming 23 | 24 | Next to `feature/` and `fix/` branches, we also have a `master` and a `production` branch. `master` is our development branch were new code is merged into first - release versions for roll-out will be merged into `production`. 25 | 26 | For the naming of software components, please orientate on the existing components and folders. Especially if you build something specific to one of the platforms (e.g `insta(gram)`, `twitter`, ...), please make sure to use the regarding prefix for folder-names. Else, please stick to formatting conventions for [Golang](https://golang.org/doc/effective_go.html), [Python](https://www.python.org/dev/peps/pep-0008/) and [React.js](https://hackernoon.com/structuring-projects-and-naming-components-in-react-1261b6e18d76). 27 | 28 | ## Task Management 29 | 30 | For our task management, we are using the [ZenHub GitHub Extension](https://www.zenhub.com/extension) which integrates a project board into GitHub. After installing the extension and reloading your browser, you will be able to see an addional `ZenHub` Tab in our repo. In there, you can see our current tasks `"In Progress"` and upcomming tasks `"ToDo"` of the current release we are working on. All tasks are represented as GitHub issues as well, so you might want to [create an own GitHub issue](https://github.com/codeuniversity/smag-mvp/issues/new/choose) for the beginning. 31 | 32 | If you have any questions or want to get more involved in the project, feel free to approach the team via: [socialrecord-project[at]code.berlin](socialrecord-project@code.berlin). 33 | -------------------------------------------------------------------------------- /insta/indexer/comments/insta_comments_indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | 7 | "github.com/codeuniversity/smag-mvp/elastic" 8 | "github.com/codeuniversity/smag-mvp/elastic/indexer" 9 | "github.com/codeuniversity/smag-mvp/elastic/models" 10 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 11 | "github.com/codeuniversity/smag-mvp/service" 12 | "github.com/codeuniversity/smag-mvp/utils" 13 | ) 14 | 15 | func main() { 16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 18 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10) 19 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts") 20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5) 21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"localhost:9201"}) 22 | 23 | i := indexer.New(esHosts, elastic.CommentsIndex, elastic.CommentsIndexMapping, kafkaAddress, changesTopic, groupID, indexComment, bulkChunkSize, bulkFetchTimeoutSeconds) 24 | 25 | service.CloseOnSignal(i) 26 | waitUntilClosed := i.Start() 27 | 28 | waitUntilClosed() 29 | } 30 | 31 | func indexComment(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) { 32 | comment := &models.InstaComment{} 33 | err := json.Unmarshal(m.Payload.After, comment) 34 | 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | switch m.Payload.Op { 40 | case "r", "c": 41 | return createBulkUpsertOperation(comment) 42 | } 43 | 44 | return nil, nil 45 | } 46 | 47 | func createBulkUpsertOperation(comment *models.InstaComment) (*indexer.BulkIndexDoc, error) { 48 | var bulkOperation = map[string]interface{}{ 49 | "update": map[string]interface{}{ 50 | "_id": comment.ID, 51 | "_index": elastic.CommentsIndex, 52 | }, 53 | } 54 | 55 | bulkOperationJson, err := json.Marshal(bulkOperation) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | bulkOperationJson = append(bulkOperationJson, "\n"...) 61 | var commentUpsert = map[string]interface{}{ 62 | "script": map[string]interface{}{ 63 | "source": "ctx._source.comment = params.comment", 64 | "lang": "painless", 65 | "params": map[string]interface{}{ 66 | "comment": comment.Comment, 67 | }, 68 | }, 69 | "upsert": map[string]interface{}{ 70 | "post_id": comment.PostID, 71 | "comment": comment.Comment, 72 | }, 73 | } 74 | 75 | commentUpsertJson, err := json.Marshal(commentUpsert) 76 | 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | commentUpsertJson = append(commentUpsertJson, "\n"...) 82 | 83 | bulkUpsertBody := string(bulkOperationJson) + string(commentUpsertJson) 84 | 85 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(comment.ID), BulkOperation: bulkUpsertBody}, err 86 | } 87 | -------------------------------------------------------------------------------- /neo4j/inserter/inserter.go: -------------------------------------------------------------------------------- 1 | package neo4jinserter 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "time" 9 | 10 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 11 | "github.com/codeuniversity/smag-mvp/utils" 12 | "github.com/codeuniversity/smag-mvp/worker" 13 | 14 | "github.com/neo4j/neo4j-go-driver/neo4j" 15 | kf "github.com/segmentio/kafka-go" 16 | ) 17 | 18 | // Inserter represents the scraper containing all clients it uses 19 | type Inserter struct { 20 | *worker.Worker 21 | 22 | qReader *kf.Reader 23 | driver neo4j.Driver 24 | session neo4j.Session 25 | 26 | inserterFunc InserterFunc 27 | } 28 | 29 | // InserterFunc is responsible to unmashal to the 30 | // needed Data from the change Message and inserts 31 | // it into neo4j 32 | type InserterFunc func(*changestream.ChangeMessage, neo4j.Session) error 33 | 34 | // New returns an initilized scraper 35 | func New(neo4jConfig *utils.Neo4jConfig, userQReader *kf.Reader, inserterFunc InserterFunc) *Inserter { 36 | i := &Inserter{} 37 | 38 | i.qReader = userQReader 39 | i.inserterFunc = inserterFunc 40 | 41 | session, driver, err := initializeNeo4j(neo4jConfig) 42 | if err != nil { 43 | panic(err) 44 | } 45 | i.session = session 46 | i.driver = driver 47 | 48 | log.Println("✅ Neo4j Connection established") 49 | 50 | i.Worker = worker.Builder{}.WithName("neo4j-inserter"). 51 | WithWorkStep(i.runStep). 52 | WithStopTimeout(10*time.Second). 53 | AddShutdownHook("userQReader", userQReader.Close). 54 | AddShutdownHook("Neo4j Driver", driver.Close). 55 | AddShutdownHook("Neo4j Session", session.Close). 56 | MustBuild() 57 | 58 | return i 59 | } 60 | 61 | // runStep the inserter 62 | func (i *Inserter) runStep() error { 63 | m, err := i.qReader.FetchMessage(context.Background()) 64 | 65 | if err != nil { 66 | return err 67 | } 68 | 69 | changeMessage := &changestream.ChangeMessage{} 70 | 71 | err = json.Unmarshal(m.Value, changeMessage) 72 | 73 | if err != nil { 74 | return err 75 | } 76 | 77 | err = i.inserterFunc(changeMessage, i.session) 78 | 79 | if err != nil { 80 | return err 81 | } 82 | 83 | log.Println("Inserted") 84 | return i.qReader.CommitMessages(context.Background(), m) 85 | } 86 | 87 | //initializeNeo4j sets connection and constraints for neo4j 88 | func initializeNeo4j(config *utils.Neo4jConfig) (neo4j.Session, neo4j.Driver, error) { 89 | address := fmt.Sprintf("bolt://%s:7687", config.Host) 90 | driver, err := neo4j.NewDriver(address, neo4j.BasicAuth(config.Username, config.Password, "")) 91 | if err != nil { 92 | return nil, nil, err 93 | } 94 | 95 | session, err := driver.Session(neo4j.AccessModeWrite) 96 | if err != nil { 97 | return nil, nil, err 98 | } 99 | 100 | _, err = session.Run("CREATE CONSTRAINT ON (U:USER) ASSERT U.id IS UNIQUE", nil) 101 | if err != nil { 102 | return nil, nil, err 103 | } 104 | 105 | return session, driver, nil 106 | } 107 | -------------------------------------------------------------------------------- /twitter/scraper/twitterscraper/scraper_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import traceback 4 | 5 | from kafka import KafkaProducer, KafkaConsumer 6 | 7 | 8 | class ScraperManager(object): 9 | 10 | name = "scraper_manager" 11 | 12 | def __init__( 13 | self, 14 | fetch_topic: str, 15 | insert_topic: str, 16 | kafka_consumer_group: str = "scraper_manager", 17 | kafka_address: str = "localhost:9092", 18 | ): 19 | self.consumer = KafkaConsumer( 20 | fetch_topic, 21 | bootstrap_servers=kafka_address, 22 | group_id=kafka_consumer_group, 23 | reconnect_backoff_ms=500, 24 | reconnect_backoff_max_ms=10000, 25 | max_poll_interval_ms=600000, 26 | ) 27 | self.producer = KafkaProducer( 28 | bootstrap_servers=kafka_address, 29 | value_serializer=lambda v: json.dumps(v).encode('utf-8'), 30 | reconnect_backoff_ms=500, 31 | reconnect_backoff_max_ms=10000, 32 | request_timeout_ms=600000, 33 | ) 34 | self.insert_topic = insert_topic 35 | 36 | def run(self): 37 | try: 38 | while True: 39 | self.consume_scrape_produce() 40 | except Exception: 41 | logging.error( 42 | "Caught error. Going to flush KafkaProducer and then throw error further." 43 | ) 44 | self.producer.flush() 45 | raise 46 | 47 | def consume_scrape_produce(self) -> None: 48 | """ 49 | Consumes from kafka, 50 | scrapes via custom function, 51 | and produces/sends scraped msges to kafka 52 | """ 53 | 54 | for msg in self.consumer: 55 | user_name = msg.value.decode("utf-8") 56 | try: 57 | self.scrape_and_produce(user_name) 58 | except Exception: 59 | self.consumer.commit() 60 | traceback.print_exc() 61 | logging.error(f"Couldn't scrape user {user_name}. Continuing") 62 | 63 | def scrape_and_produce(self, user_name: str) -> None: 64 | msg = self.scrape(user_name) 65 | msg_list = msg if type(msg) is list else [msg] 66 | for m in msg_list: 67 | self.produce(m) 68 | logging.info( 69 | f"Done sending {len(msg_list)} element(s) to kafka/{self.insert_topic}" 70 | ) 71 | 72 | def scrape(self, user_name: str): 73 | """This method will be implemented by the user to scrape either user-profile or tweets""" 74 | raise NotImplementedError( 75 | "You need to implement a scrape(user_name: str) method, " 76 | "which returns an object to be written to kafka." 77 | ) 78 | 79 | def produce(self, msg) -> None: 80 | topic = self.insert_topic 81 | logging.debug( 82 | f"{self.name} sends msg (from {msg.username}) to kafka/{topic}" 83 | ) 84 | msg_dict = getattr(msg, "__dict__") 85 | self.producer.send(topic, msg_dict) 86 | -------------------------------------------------------------------------------- /insta/indexer/users/insta_users_indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | 7 | "github.com/codeuniversity/smag-mvp/elastic" 8 | "github.com/codeuniversity/smag-mvp/elastic/indexer" 9 | "github.com/codeuniversity/smag-mvp/elastic/models" 10 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 11 | "github.com/codeuniversity/smag-mvp/service" 12 | "github.com/codeuniversity/smag-mvp/utils" 13 | ) 14 | 15 | func main() { 16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 17 | groupID := utils.GetStringFromEnvWithDefault("KAFKA_GROUPID", "insta_usersearch-inserter") 18 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.users") 19 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10) 20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5) 21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9201"}) 22 | 23 | i := indexer.New(esHosts, elastic.UsersIndex, elastic.UsersIndexMapping, kafkaAddress, changesTopic, groupID, handleChangeMessage, bulkChunkSize, bulkFetchTimeoutSeconds) 24 | 25 | service.CloseOnSignal(i) 26 | waitUntilClosed := i.Start() 27 | 28 | waitUntilClosed() 29 | } 30 | 31 | // handleChangeMessage filters relevant events and upserts them 32 | func handleChangeMessage(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) { 33 | user := &models.InstaUser{} 34 | if err := json.Unmarshal(m.Payload.After, user); err != nil { 35 | return nil, err 36 | } 37 | 38 | switch m.Payload.Op { 39 | case "c", "r", "u": 40 | return createBulkUpsertOperation(user) 41 | } 42 | return nil, nil 43 | } 44 | 45 | func createBulkUpsertOperation(user *models.InstaUser) (*indexer.BulkIndexDoc, error) { 46 | var bulkOperation = map[string]interface{}{ 47 | "update": map[string]interface{}{ 48 | "_id": user.ID, 49 | "_index": elastic.UsersIndex, 50 | }, 51 | } 52 | 53 | bulkOperationJson, err := json.Marshal(bulkOperation) 54 | 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | bulkOperationJson = append(bulkOperationJson, "\n"...) 60 | 61 | var usersUpsert = map[string]interface{}{ 62 | "script": map[string]interface{}{ 63 | "source": "ctx._source.user_name = params.user_name; ctx._source.real_name = params.real_name; ctx._source.bio = params.bio", 64 | "lang": "painless", 65 | "params": map[string]interface{}{ 66 | "user_name": user.Username, 67 | "real_name": user.Realname, 68 | "bio": user.Bio, 69 | }, 70 | }, 71 | "upsert": map[string]interface{}{ 72 | "user_name": user.Username, 73 | "real_name": user.Realname, 74 | "bio": user.Bio, 75 | }, 76 | } 77 | 78 | usersUpsertJson, err := json.Marshal(usersUpsert) 79 | 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | usersUpsertJson = append(usersUpsertJson, "\n"...) 85 | 86 | bulkUpsertBody := string(bulkOperationJson) + string(usersUpsertJson) 87 | 88 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(user.ID), BulkOperation: bulkUpsertBody}, err 89 | } 90 | -------------------------------------------------------------------------------- /insta/indexer/posts/insta_posts_indexer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "strconv" 6 | 7 | "github.com/codeuniversity/smag-mvp/elastic" 8 | "github.com/codeuniversity/smag-mvp/elastic/indexer" 9 | "github.com/codeuniversity/smag-mvp/elastic/models" 10 | "github.com/codeuniversity/smag-mvp/kafka/changestream" 11 | "github.com/codeuniversity/smag-mvp/service" 12 | "github.com/codeuniversity/smag-mvp/utils" 13 | ) 14 | 15 | func main() { 16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092") 17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID") 18 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10) 19 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts") 20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5) 21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"localhost:9201"}) 22 | 23 | i := indexer.New(esHosts, elastic.PostsIndex, elastic.PostsIndexMapping, kafkaAddress, changesTopic, groupID, indexPost, bulkChunkSize, bulkFetchTimeoutSeconds) 24 | 25 | service.CloseOnSignal(i) 26 | waitUntilClosed := i.Start() 27 | 28 | waitUntilClosed() 29 | } 30 | 31 | func indexPost(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) { 32 | currentPost := &models.InstaPost{} 33 | err := json.Unmarshal(m.Payload.After, currentPost) 34 | 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | switch m.Payload.Op { 40 | case "r", "c": 41 | return createBulkUpsertOperation(currentPost) 42 | case "u": 43 | previousPost := &models.InstaPost{} 44 | err := json.Unmarshal(m.Payload.Before, previousPost) 45 | 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | if previousPost.Caption != currentPost.Caption { 51 | return createBulkUpsertOperation(currentPost) 52 | } 53 | } 54 | 55 | return nil, nil 56 | } 57 | 58 | func createBulkUpsertOperation(post *models.InstaPost) (*indexer.BulkIndexDoc, error) { 59 | var bulkOperation = map[string]interface{}{ 60 | "update": map[string]interface{}{ 61 | "_id": post.ID, 62 | "_index": elastic.PostsIndex, 63 | }, 64 | } 65 | 66 | bulkOperationJson, err := json.Marshal(bulkOperation) 67 | 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | bulkOperationJson = append(bulkOperationJson, "\n"...) 73 | 74 | var commentUpsert = map[string]interface{}{ 75 | "script": map[string]interface{}{ 76 | "source": "ctx._source.caption = params.caption", 77 | "lang": "painless", 78 | "params": map[string]interface{}{ 79 | "caption": post.Caption, 80 | }, 81 | }, 82 | "upsert": map[string]interface{}{ 83 | "user_id": post.UserID, 84 | "caption": post.Caption, 85 | }, 86 | } 87 | 88 | postUpsertJson, err := json.Marshal(commentUpsert) 89 | 90 | if err != nil { 91 | return nil, err 92 | } 93 | 94 | postUpsertJson = append(postUpsertJson, "\n"...) 95 | bulkUpsertBody := string(bulkOperationJson) + string(postUpsertJson) 96 | 97 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(post.ID), BulkOperation: bulkUpsertBody}, err 98 | } 99 | -------------------------------------------------------------------------------- /utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | "github.com/google/uuid" 12 | ) 13 | 14 | //WithRetries calls f up to the given `times` and returns the last error if times is reached 15 | func WithRetries(times int, f func() error) error { 16 | var err error 17 | for i := 0; i < times; i++ { 18 | err = f() 19 | if err == nil { 20 | return nil 21 | } 22 | log.Println(err) 23 | time.Sleep(100 * time.Millisecond) 24 | } 25 | return err 26 | } 27 | 28 | //GetStringFromEnvWithDefault returns default Value if OS Environment Variable is not set 29 | func GetStringFromEnvWithDefault(enVarName, defaultValue string) string { 30 | envValue := os.Getenv(enVarName) 31 | if envValue == "" { 32 | return defaultValue 33 | } 34 | 35 | return envValue 36 | } 37 | 38 | // GetNumberFromEnvWithDefault returns default Value if OS Environment Variable is not set 39 | func GetNumberFromEnvWithDefault(envVarName string, defaultValue int) int { 40 | envValue := os.Getenv(envVarName) 41 | number, err := strconv.ParseInt(envValue, 10, 64) 42 | if err != nil { 43 | return defaultValue 44 | } 45 | 46 | return int(number) 47 | } 48 | 49 | //MustGetStringFromEnv panics if OS Environment Variable is not set 50 | func MustGetStringFromEnv(enVarName string) string { 51 | envValue := os.Getenv(enVarName) 52 | if envValue == "" { 53 | panic(fmt.Sprintf("%s must not be empty", enVarName)) 54 | } 55 | 56 | return envValue 57 | } 58 | 59 | func GetMultipleStringsFromEnvWithDefault(envVarName string, defaultValue []string) []string { 60 | envValue := os.Getenv(envVarName) 61 | if envValue == "" { 62 | return defaultValue 63 | } 64 | envValues := strings.Split(envValue, ",") 65 | return envValues 66 | } 67 | 68 | // GetBoolFromEnvWithDefault parses an OS Environment Variable as bool 69 | func GetBoolFromEnvWithDefault(enVarName string, defaultValue bool) bool { 70 | envValue := os.Getenv(enVarName) 71 | if envValue == "" { 72 | return defaultValue 73 | } 74 | 75 | envBool, err := strconv.ParseBool(envValue) 76 | if err != nil { 77 | panic(fmt.Errorf("couldn't parse %s as bool: %s", enVarName, err)) 78 | } 79 | 80 | return envBool 81 | } 82 | 83 | // PanicIfNotNil panics if err != nil 84 | func PanicIfNotNil(err error) { 85 | if err != nil { 86 | //TODO: graceful shutdown 87 | panic(err) 88 | } 89 | } 90 | 91 | // MustBeNil panics if err != nil 92 | func MustBeNil(err error) { 93 | if err != nil { 94 | panic(err) 95 | } 96 | } 97 | 98 | // ConvertIntToBool converts an integer to a bool (binary) 99 | func ConvertIntToBool(value int) bool { 100 | if value == 1 { 101 | return true 102 | } 103 | return false 104 | } 105 | 106 | // ConvertDateStrToTime converts a dateStr to a time.Time obj 107 | func ConvertDateStrToTime(dateStr string) (time.Time, error) { 108 | return time.Parse("02 Jan 2006", dateStr) 109 | } 110 | 111 | // RandUUIDSeq returns a random uuid string 112 | func RandUUIDSeq() string { 113 | id, err := uuid.NewRandom() 114 | MustBeNil(err) 115 | 116 | return id.String() 117 | } 118 | --------------------------------------------------------------------------------