├── frontend
├── src
│ ├── App.css
│ ├── components
│ │ ├── img
│ │ │ ├── fb.png
│ │ │ ├── twitter.png
│ │ │ ├── instagram.png
│ │ │ ├── linkedin.png
│ │ │ └── chevron-left-solid.svg
│ │ ├── H1.js
│ │ ├── H2.js
│ │ ├── FaceHitAnimation.css
│ │ ├── InterestFooter.jsx
│ │ ├── BackButton.js
│ │ ├── PostsCard.jsx
│ │ ├── Button.jsx
│ │ ├── StatsCard.jsx
│ │ ├── ProfileCard.jsx
│ │ ├── BioCard.jsx
│ │ ├── IGPost.js
│ │ ├── LocationCard.jsx
│ │ ├── EndButton.jsx
│ │ ├── InterestCard.jsx
│ │ ├── Slideshow.jsx
│ │ ├── Form.js
│ │ ├── Popup.jsx
│ │ ├── Start.js
│ │ ├── FaceHitAnimation.js
│ │ └── camera-feed.js
│ ├── notfound.js
│ ├── static
│ │ └── fonts
│ │ │ └── FontsFree-Net-SFProDisplay-Regular.ttf
│ ├── creativeCode.css
│ ├── App.test.js
│ ├── css
│ │ └── endButton.css
│ ├── pages
│ │ ├── Greeting.jsx
│ │ ├── GroupIntent.jsx
│ │ ├── endscreen.jsx
│ │ ├── SearchProfile.jsx
│ │ └── ExampleProfileSelection.jsx
│ ├── index.js
│ └── App.js
├── public
│ ├── robots.txt
│ ├── favicon.ico
│ ├── logo192.png
│ ├── logo512.png
│ ├── Barlow-SemiBold.otf
│ ├── favicon
│ │ ├── favicon.ico
│ │ ├── apple-icon.png
│ │ ├── favicon-16x16.png
│ │ ├── favicon-32x32.png
│ │ ├── favicon-96x96.png
│ │ ├── ms-icon-70x70.png
│ │ ├── apple-icon-57x57.png
│ │ ├── apple-icon-60x60.png
│ │ ├── apple-icon-72x72.png
│ │ ├── apple-icon-76x76.png
│ │ ├── ms-icon-144x144.png
│ │ ├── ms-icon-150x150.png
│ │ ├── ms-icon-310x310.png
│ │ ├── android-icon-36x36.png
│ │ ├── android-icon-48x48.png
│ │ ├── android-icon-72x72.png
│ │ ├── android-icon-96x96.png
│ │ ├── apple-icon-114x114.png
│ │ ├── apple-icon-120x120.png
│ │ ├── apple-icon-144x144.png
│ │ ├── apple-icon-152x152.png
│ │ ├── apple-icon-180x180.png
│ │ ├── android-icon-144x144.png
│ │ ├── android-icon-192x192.png
│ │ ├── apple-icon-precomposed.png
│ │ ├── browserconfig.xml
│ │ └── manifest.json
│ ├── manifest.json
│ ├── background.js
│ └── index.html
├── readme.pdf
├── package.json
└── README.md
├── .vscode
└── settings.json
├── twitter
├── scraper
│ ├── twitterscraper
│ │ ├── __init__.py
│ │ ├── utils.py
│ │ ├── insert_seed.py
│ │ ├── posts_scraper.py
│ │ ├── users_scraper.py
│ │ ├── following_scraper.py
│ │ ├── followers_scraper.py
│ │ └── scraper_manager.py
│ ├── img
│ │ └── twitter_scraper-architecture.jpg
│ ├── Dockerfile
│ ├── pyproject.toml
│ └── requirements.txt
├── models
│ └── gorm.go
├── debezium
│ ├── Dockerfile
│ └── register-postgres.json
├── inserter
│ ├── posts
│ │ ├── Dockerfile
│ │ ├── main
│ │ │ └── main.go
│ │ └── inserter.go
│ └── users
│ │ ├── Dockerfile
│ │ ├── main
│ │ └── main.go
│ │ └── inserter.go
└── filter
│ └── user_names
│ ├── Dockerfile
│ ├── main.go
│ └── filter_test.go
├── insta
├── db
│ ├── migrations
│ │ ├── 000006_set_replica_identity_full.down.sql
│ │ ├── 000008_set_replica_identity_full.down.sql
│ │ ├── 000003_create_posts_table.down.sql
│ │ ├── 000001_create_users_table.down.sql
│ │ ├── 000004_create_comments_table.down.sql
│ │ ├── 000013_create_post_likes.down.sql
│ │ ├── 000005_create_post_tagged_users_table.down.sql
│ │ ├── 000009_add_index_to_posts_user_id_ref.down.sql
│ │ ├── 000009_add_index_to_posts_user_id_ref.up.sql
│ │ ├── 000012_add_index_to_post_tagged_users_ref.down.sql
│ │ ├── 000014_add_index_to_post_likes_user_id_ref.down.sql
│ │ ├── 000015_add_index_to_post_likes_post_id_ref.down.sql
│ │ ├── 000010_add_index_to_insta_users_user_name_ref.down.sql
│ │ ├── 000007_add_internal_picture_url_to_posts.down.sql
│ │ ├── 000016_add_index_to_post_tagged_users_post_id_ref.down.sql
│ │ ├── 000007_add_internal_picture_url_to_posts.up.sql
│ │ ├── 000014_add_index_to_post_likes_user_id_ref.up.sql
│ │ ├── 000015_add_index_to_post_likes_post_id_ref.up.sql
│ │ ├── 000010_add_index_to_insta_users_user_name_ref.up.sql
│ │ ├── 000012_add_index_to_post_tagged_users_ref.up.sql
│ │ ├── 000016_add_index_to_post_tagged_users_post_id_ref.up.sql
│ │ ├── 000011_add_index_to_follows_ref.down.sql
│ │ ├── 000011_add_index_to_follows_ref.up.sql
│ │ ├── 000002_create_follows_table.down.sql
│ │ ├── 000006_set_replica_identity_full.up.sql
│ │ ├── 000008_set_replica_identity_full.up.sql
│ │ ├── 000001_create_users_table.up.sql
│ │ ├── 000005_create_post_tagged_users_table.up.sql
│ │ ├── 000013_create_post_likes.up.sql
│ │ ├── 000004_create_comments_table.up.sql
│ │ ├── 000002_create_follows_table.up.sql
│ │ └── 000003_create_posts_table.up.sql
│ ├── Dockerfile
│ ├── kubernetes
│ │ ├── Dockerfile
│ │ ├── kube-register-postgres.json
│ │ └── kube-start-postgres.sh
│ ├── start-postgres.sh
│ └── register-postgres.json
├── models
│ ├── post_download_job.go
│ └── faces.go
├── posts_face-detection
│ ├── worker.Dockerfile
│ └── main
│ │ └── main.go
├── filter
│ ├── post_pictures
│ │ ├── Dockerfile
│ │ ├── main.go
│ │ └── filter_test.go
│ ├── post_face-recon
│ │ ├── Dockerfile
│ │ └── main.go
│ └── user_names
│ │ ├── Dockerfile
│ │ ├── main.go
│ │ └── filter_test.go
├── inserter
│ ├── posts_face
│ │ ├── Dockerfile
│ │ └── main
│ │ │ └── main.go
│ ├── postgres
│ │ ├── Dockerfile
│ │ └── main
│ │ │ └── main.go
│ ├── likes
│ │ ├── Dockerfile
│ │ └── main
│ │ │ └── main.go
│ ├── posts
│ │ ├── Dockerfile
│ │ └── main
│ │ │ └── main.go
│ ├── comments
│ │ ├── Dockerfile
│ │ └── main
│ │ │ └── main.go
│ └── neo4j
│ │ ├── posts
│ │ ├── Dockerfile
│ │ └── main.go
│ │ ├── user
│ │ ├── Dockerfile
│ │ └── main.go
│ │ └── tagged_users
│ │ ├── Dockerfile
│ │ └── main.go
├── pics-downloader
│ ├── Dockerfile
│ └── main
│ │ └── main.go
├── indexer
│ ├── faces
│ │ ├── Dockerfile
│ │ └── insta_faces_indexer.go
│ ├── posts
│ │ ├── Dockerfile
│ │ └── insta_posts_indexer.go
│ ├── users
│ │ ├── Dockerfile
│ │ └── insta_users_indexer.go
│ └── comments
│ │ ├── Dockerfile
│ │ └── insta_comments_indexer.go
└── scraper
│ ├── likes
│ ├── Dockerfile
│ ├── main
│ │ └── main.go
│ └── insta-model.go
│ ├── posts
│ ├── Dockerfile
│ └── main
│ │ └── main.go
│ ├── user
│ ├── Dockerfile
│ └── main
│ │ └── main.go
│ └── comments
│ ├── Dockerfile
│ └── main
│ └── main.go
├── faces
├── .dockerignore
├── Makefile
├── requirements.txt
├── metrics.py
├── proto
│ └── recognizer.proto
├── Dockerfile
├── recognitiontest
│ └── main.go
├── server.py
├── recognizer_pb2_grpc.py
└── recognizer.py
├── docs
├── architecture.png
├── insta_schema.png
└── twitter_schema.png
├── elastic
├── indexer
│ └── model.go
├── indicies.go
├── utils.go
├── models
│ ├── insta.go
│ └── face.go
├── build
│ └── Dockerfile
├── search
│ └── facetest
│ │ └── main.go
└── mapping.go
├── api
├── envoy-proxy
│ ├── Dockerfile
│ └── envoy.yaml
├── grpcserver
│ ├── Dockerfile
│ └── main
│ │ └── main.go
├── README.md
└── proto
│ └── usersearch.proto
├── .gitignore
├── aws_service
├── main
│ └── main.go
├── proto
│ └── renewingAddress.proto
└── Dockerfile
├── .github
├── ISSUE_TEMPLATE
│ └── default.md
└── pull_request_template.md
├── .dockerignore
├── cli
├── Dockerfile
└── main
│ └── main.go
├── face-recognition
├── Dockerfile
└── main
│ └── main.go
├── scraper-client
├── scraper-client.go
├── scraper-config.go
└── simple-scraper-client.go
├── neo4j
├── create-import-user-json
│ ├── Dockerfile
│ └── main
│ │ └── main.go
└── inserter
│ └── inserter.go
├── config
├── postgres-config.go
└── s3-config.go
├── utils
├── neo4j-utils.go
└── utils.go
├── tools
├── Dockerfile
└── .zshrc
├── db
├── utils.go
└── README.md
├── Makefile
├── nlp
└── frequency-analyzer
│ ├── cities.json
│ └── main
│ └── main.go
├── go.mod
├── kafka
└── changestream
│ ├── change_message.go
│ └── filter.go
├── http_header-generator
└── generator.go
├── imgproxy
└── urlbuilder.go
├── worker
├── worker.go
└── builder.go
├── service
└── executor.go
└── CONTRIBUTING.md
/frontend/src/App.css:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/insta/db/migrations/000006_set_replica_identity_full.down.sql:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/insta/db/migrations/000008_set_replica_identity_full.down.sql:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/insta/db/migrations/000003_create_posts_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS posts;
--------------------------------------------------------------------------------
/insta/db/migrations/000001_create_users_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS users;
2 |
--------------------------------------------------------------------------------
/faces/.dockerignore:
--------------------------------------------------------------------------------
1 | .dockerignore
2 | __pycache__
3 | */__pycache__
4 | env
5 | images
6 |
--------------------------------------------------------------------------------
/frontend/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 |
--------------------------------------------------------------------------------
/insta/db/migrations/000004_create_comments_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS comments;
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000013_create_post_likes.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS post_likes;
2 |
--------------------------------------------------------------------------------
/frontend/readme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/readme.pdf
--------------------------------------------------------------------------------
/docs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/architecture.png
--------------------------------------------------------------------------------
/docs/insta_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/insta_schema.png
--------------------------------------------------------------------------------
/docs/twitter_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/docs/twitter_schema.png
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/public/logo192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/logo192.png
--------------------------------------------------------------------------------
/frontend/public/logo512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/logo512.png
--------------------------------------------------------------------------------
/insta/db/migrations/000005_create_post_tagged_users_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS post_tagged_users;
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000009_add_index_to_posts_user_id_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS posts_user_id_index;
2 |
--------------------------------------------------------------------------------
/faces/Makefile:
--------------------------------------------------------------------------------
1 | gen:
2 | python -m grpc_tools.protoc -Iproto --python_out=. --grpc_python_out=. proto/recognizer.proto
3 |
--------------------------------------------------------------------------------
/insta/db/migrations/000009_add_index_to_posts_user_id_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX posts_user_id_index ON posts (user_id);
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000012_add_index_to_post_tagged_users_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS post_tagged_users_index;
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000014_add_index_to_post_likes_user_id_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS post_likes_user_id_index;
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000015_add_index_to_post_likes_post_id_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS post_likes_post_id_index;
2 |
--------------------------------------------------------------------------------
/frontend/src/components/img/fb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/fb.png
--------------------------------------------------------------------------------
/frontend/src/notfound.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | const Notfound = () =>
Not found
3 | export default Notfound
4 |
--------------------------------------------------------------------------------
/insta/db/migrations/000010_add_index_to_insta_users_user_name_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS insta_users_user_name_index;
2 |
--------------------------------------------------------------------------------
/frontend/public/Barlow-SemiBold.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/Barlow-SemiBold.otf
--------------------------------------------------------------------------------
/frontend/public/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon.ico
--------------------------------------------------------------------------------
/insta/db/migrations/000007_add_internal_picture_url_to_posts.down.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE posts
2 | DROP COLUMN internal_picture_url;
3 |
--------------------------------------------------------------------------------
/insta/db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS tagged_posts_post_id_index;
2 |
--------------------------------------------------------------------------------
/elastic/indexer/model.go:
--------------------------------------------------------------------------------
1 | package indexer
2 |
3 | type BulkIndexDoc struct {
4 | DocumentId string
5 | BulkOperation string
6 | }
7 |
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon.png
--------------------------------------------------------------------------------
/frontend/src/components/img/twitter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/twitter.png
--------------------------------------------------------------------------------
/insta/db/migrations/000007_add_internal_picture_url_to_posts.up.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE posts
2 | ADD COLUMN internal_picture_url VARCHAR;
3 |
--------------------------------------------------------------------------------
/insta/db/migrations/000014_add_index_to_post_likes_user_id_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX post_likes_user_id_index ON post_likes(user_id);
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000015_add_index_to_post_likes_post_id_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX post_likes_post_id_index ON post_likes(post_id);
2 |
--------------------------------------------------------------------------------
/frontend/public/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-16x16.png
--------------------------------------------------------------------------------
/frontend/public/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-32x32.png
--------------------------------------------------------------------------------
/frontend/public/favicon/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/favicon-96x96.png
--------------------------------------------------------------------------------
/frontend/public/favicon/ms-icon-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-70x70.png
--------------------------------------------------------------------------------
/frontend/src/components/img/instagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/instagram.png
--------------------------------------------------------------------------------
/frontend/src/components/img/linkedin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/components/img/linkedin.png
--------------------------------------------------------------------------------
/insta/db/migrations/000010_add_index_to_insta_users_user_name_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX insta_users_user_name_index ON users (user_name);
2 |
--------------------------------------------------------------------------------
/insta/db/migrations/000012_add_index_to_post_tagged_users_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX post_tagged_users_index ON post_tagged_users (user_id);
2 |
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-57x57.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-60x60.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-72x72.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-76x76.png
--------------------------------------------------------------------------------
/frontend/public/favicon/ms-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-144x144.png
--------------------------------------------------------------------------------
/frontend/public/favicon/ms-icon-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-150x150.png
--------------------------------------------------------------------------------
/frontend/public/favicon/ms-icon-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/ms-icon-310x310.png
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-36x36.png
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-48x48.png
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-72x72.png
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-96x96.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-114x114.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-120x120.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-144x144.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-152x152.png
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-180x180.png
--------------------------------------------------------------------------------
/insta/db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX tagged_posts_post_id_index ON post_tagged_users (post_id);
2 |
--------------------------------------------------------------------------------
/api/envoy-proxy/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM envoyproxy/envoy-dev:4e858f17fe08224c9c089240908ccd0c518e01a7
2 | COPY api/envoy-proxy/envoy.yaml /etc/envoy/envoy.yaml
3 |
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-144x144.png
--------------------------------------------------------------------------------
/frontend/public/favicon/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/android-icon-192x192.png
--------------------------------------------------------------------------------
/insta/db/migrations/000011_add_index_to_follows_ref.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS folows_from_id_index;
2 | DROP INDEX IF EXISTS folows_to_id_index;
3 |
4 |
--------------------------------------------------------------------------------
/frontend/public/favicon/apple-icon-precomposed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/public/favicon/apple-icon-precomposed.png
--------------------------------------------------------------------------------
/twitter/scraper/img/twitter_scraper-architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/twitter/scraper/img/twitter_scraper-architecture.jpg
--------------------------------------------------------------------------------
/insta/db/migrations/000011_add_index_to_follows_ref.up.sql:
--------------------------------------------------------------------------------
1 | CREATE INDEX folows_from_id_index ON follows (from_id);
2 | CREATE INDEX folows_to_id_index ON follows (to_id);
3 |
--------------------------------------------------------------------------------
/frontend/src/static/fonts/FontsFree-Net-SFProDisplay-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeuniversity/smag-mvp/HEAD/frontend/src/static/fonts/FontsFree-Net-SFProDisplay-Regular.ttf
--------------------------------------------------------------------------------
/elastic/indicies.go:
--------------------------------------------------------------------------------
1 | package elastic
2 |
3 | const (
4 | CommentsIndex = "insta_comments"
5 | FacesIndex = "faces"
6 | PostsIndex = "insta_posts"
7 | UsersIndex = "insta_users"
8 | )
9 |
--------------------------------------------------------------------------------
/insta/db/migrations/000002_create_follows_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP INDEX IF EXISTS follows_from_id_fkey;
2 | DROP INDEX IF EXISTS follows_to_id_fkey;
3 | DROP INDEX IF EXISTS follows_uniq_relationship_index;
4 | DROP TABLE IF EXISTS follows;
5 |
--------------------------------------------------------------------------------
/insta/db/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM migrate/migrate:v4.6.2
2 |
3 | RUN apk add --no-cache --upgrade \
4 | bash \
5 | curl
6 |
7 | WORKDIR /src
8 | COPY insta/db/ db/
9 |
10 | ENTRYPOINT [ "bash" ]
11 | CMD [ "db/start-postgres.sh" ]
12 |
--------------------------------------------------------------------------------
/insta/db/migrations/000006_set_replica_identity_full.up.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE posts REPLICA IDENTITY FULL;
2 | ALTER TABLE users REPLICA IDENTITY FULL;
3 | ALTER TABLE follows REPLICA IDENTITY FULL;
4 | ALTER TABLE comments REPLICA IDENTITY FULL;
5 |
--------------------------------------------------------------------------------
/insta/db/migrations/000008_set_replica_identity_full.up.sql:
--------------------------------------------------------------------------------
1 | ALTER TABLE posts REPLICA IDENTITY FULL;
2 | ALTER TABLE users REPLICA IDENTITY FULL;
3 | ALTER TABLE follows REPLICA IDENTITY FULL;
4 | ALTER TABLE comments REPLICA IDENTITY FULL;
5 |
--------------------------------------------------------------------------------
/insta/models/post_download_job.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | // PostDownloadJob represents a post which should be downloaded
4 | type PostDownloadJob struct {
5 | PostID int `json:"post_id"`
6 | PictureURL string `json:"picture_url"`
7 | }
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDE
2 | .idea/
3 | .vscode/
4 |
5 | # Python
6 | __pycache__/
7 | *.pyc
8 |
9 | .DS_Store
10 | .vscode
11 |
12 | node_modules
13 | frontend/yarn.lock
14 | frontend/build
15 |
16 | env
17 | faces/images
18 | faces/img.jpg
19 |
20 |
--------------------------------------------------------------------------------
/frontend/src/components/H1.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 |
3 | function H1(props) {
4 | return (
5 |
6 |
{props.children}
7 |
8 | );
9 | }
10 |
11 | export default H1;
12 |
--------------------------------------------------------------------------------
/insta/db/migrations/000001_create_users_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS users(
2 | id serial PRIMARY KEY,
3 | user_name VARCHAR UNIQUE NOT NULL,
4 | real_name VARCHAR,
5 | avatar_url VARCHAR,
6 | bio text,
7 | crawl_ts integer
8 | );
9 |
--------------------------------------------------------------------------------
/frontend/src/components/H2.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 |
3 | function H2(props) {
4 | return (
5 |
6 |
{props.children}
7 |
8 | );
9 | }
10 |
11 | export default H2;
12 |
--------------------------------------------------------------------------------
/insta/db/migrations/000005_create_post_tagged_users_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS post_tagged_users
2 | (
3 | id serial,
4 | post_id INTEGER REFERENCES posts (id),
5 | user_id INTEGER REFERENCES users (id),
6 | PRIMARY KEY (post_id, user_id)
7 | );
8 |
--------------------------------------------------------------------------------
/insta/db/migrations/000013_create_post_likes.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS post_likes
2 | (
3 | id serial PRIMARY KEY,
4 | like_id VARCHAR UNIQUE NOT NULL,
5 | user_id INTEGER REFERENCES users (id),
6 | post_id INTEGER REFERENCES posts(id)
7 | );
8 |
--------------------------------------------------------------------------------
/twitter/models/gorm.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import "time"
4 |
5 | // GormModelWithoutID is an alternative definition for gorm.Model without an ID
6 | type GormModelWithoutID struct {
7 | CreatedAt time.Time
8 | UpdatedAt time.Time
9 | DeletedAt *time.Time
10 | }
11 |
--------------------------------------------------------------------------------
/frontend/src/creativeCode.css:
--------------------------------------------------------------------------------
1 | .white-background {
2 | position: fixed;
3 | z-index: 1;
4 | top: 0;
5 | left: 0;
6 | right: 0;
7 | bottom: 0;
8 | background: white;
9 | }
10 |
11 | .container {
12 | position: relative;
13 | z-index: 2;
14 | }
--------------------------------------------------------------------------------
/insta/db/migrations/000004_create_comments_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS comments(
2 | id serial PRIMARY KEY,
3 | post_id INTEGER REFERENCES posts(id),
4 | comment_id VARCHAR UNIQUE NOT NULL,
5 | comment_text text,
6 | owner_user_id INTEGER REFERENCES users(id)
7 | );
8 |
--------------------------------------------------------------------------------
/insta/db/migrations/000002_create_follows_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS follows(
2 | id serial PRIMARY KEY,
3 | from_id INTEGER REFERENCES users(id),
4 | to_id INTEGER REFERENCES users(id)
5 | );
6 |
7 | CREATE UNIQUE INDEX follows_uniq_relationship_index ON follows (from_id, to_id);
8 |
--------------------------------------------------------------------------------
/frontend/src/App.test.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import App from './App';
4 |
5 | it('renders without crashing', () => {
6 | const div = document.createElement('div');
7 | ReactDOM.render(, div);
8 | ReactDOM.unmountComponentAtNode(div);
9 | });
10 |
--------------------------------------------------------------------------------
/frontend/src/components/FaceHitAnimation.css:
--------------------------------------------------------------------------------
1 | .BackgroundImage {
2 | position: fixed;
3 | width: 600px;
4 | }
5 |
6 | .BackgroundImage-center {
7 | width: 800px;
8 | height: 800px;
9 | position: fixed;
10 | top: 50%;
11 | left: 50%;
12 | margin-left: -400px;
13 | margin-top: -400px;
14 | }
15 |
--------------------------------------------------------------------------------
/insta/db/migrations/000003_create_posts_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS posts
2 | (
3 | id serial PRIMARY KEY,
4 | user_id INTEGER REFERENCES users (id),
5 | post_id VARCHAR UNIQUE NOT NULL,
6 | short_code VARCHAR,
7 | picture_url VARCHAR,
8 | caption VARCHAR
9 | );
10 |
--------------------------------------------------------------------------------
/frontend/public/favicon/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 | #ffffff
--------------------------------------------------------------------------------
/aws_service/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/codeuniversity/smag-mvp/aws_service"
5 | "github.com/codeuniversity/smag-mvp/utils"
6 | )
7 |
8 | func main() {
9 | grpcPort := utils.GetStringFromEnvWithDefault("GRPC_PORT", "9900")
10 | s := aws_service.New(grpcPort)
11 | s.Listen()
12 | }
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/default.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Default
3 | about: Shortcut for writing a new issue
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | __Description__
11 |
12 | ...
13 |
14 | __Checklist__
15 |
16 | - [ ] ...
17 |
18 | __Attachments__
19 |
20 | - [Title](URL)
21 | - 
22 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # IDE stuf
2 | .vscode/
3 | .idea/
4 |
5 | # git stuff
6 | .gitignore
7 | *.md
8 |
9 | # DOCKER stuff
10 | docker-compose.yml
11 | */Dockerfile
12 | Dockerfile
13 | *.Dockerfile
14 | .dockerignore
15 |
16 | # PYTHON stuff
17 | __pycache__/
18 | *.pyc
19 | *faces/env/*
20 | *faces/images/*
21 | frontend
22 | Makefile
23 |
--------------------------------------------------------------------------------
/insta/posts_face-detection/worker.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM codesmag/opencv AS builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN GOOS=linux go build -o worker.bin insta/posts_face-detection/main/main.go
7 | COPY insta/posts_face-detection/haarcascade_frontalface_alt.xml .
8 |
9 | CMD ["./worker.bin"]
10 |
--------------------------------------------------------------------------------
/twitter/debezium/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 |
3 | RUN apt-get update && apt-get install -y \
4 | curl
5 |
6 | WORKDIR /src
7 | COPY register-postgres.json .
8 |
9 | SHELL [ "bash" ]
10 | CMD [ "curl", "-i", "-X", "POST", "-H", "Accept:application/json", "-H", "Content-Type:application/json", "http://connect:8083/connectors/", "-d", "@register-postgres.json" ]
11 |
--------------------------------------------------------------------------------
/faces/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2019.9.11
2 | chardet==3.0.4
3 | Click==7.0
4 | dlib==19.9.0
5 | face-recognition==1.2.3
6 | face-recognition-models==0.3.0
7 | grpcio==1.18.0
8 | grpcio-tools==1.18.0
9 | idna==2.8
10 | numpy==1.17.4
11 | Pillow==6.2.1
12 | prometheus-client==0.7.1
13 | protobuf==3.10.0
14 | pycodestyle==2.5.0
15 | requests==2.22.0
16 | six==1.13.0
17 | urllib3==1.25.7
18 |
--------------------------------------------------------------------------------
/insta/filter/post_pictures/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o filter insta/filter/post_pictures/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY --from=builder /app/filter .
12 | CMD ["./filter"]
13 |
--------------------------------------------------------------------------------
/insta/filter/post_face-recon/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o filter insta/filter/post_face-recon/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY --from=builder /app/filter .
12 | CMD ["./filter"]
13 |
--------------------------------------------------------------------------------
/insta/inserter/posts_face/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o inserter insta/inserter/posts_face/main/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY --from=builder /app/inserter .
12 | CMD ["./inserter"]
13 |
--------------------------------------------------------------------------------
/insta/pics-downloader/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o downloader insta/pics-downloader/main/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY --from=builder /app/downloader .
12 | CMD ["./downloader"]
13 |
--------------------------------------------------------------------------------
/cli/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o smag-cli cli/main/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY --from=builder /app/smag-cli .
12 | ENTRYPOINT ["./smag-cli"]
13 | CMD [ "" ] # optional explicit statement
14 |
--------------------------------------------------------------------------------
/faces/metrics.py:
--------------------------------------------------------------------------------
1 | from prometheus_client import Counter, Histogram
2 |
3 | request_counter = Counter(
4 | 'request_count', 'times an endpoint was called', labelnames=['endpoint'])
5 |
6 | request_latency_histogram = Histogram('request_latency_seconds', 'the time it takes for an endpoint to respond',
7 | labelnames=['endpoint'], buckets=(.01, .05, .1, .5, 1.0, 2.0, 4.0, 8.0, 10.0))
8 |
--------------------------------------------------------------------------------
/face-recognition/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o worker.bin face-recognition/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY --from=builder /app/worker.bin /app
13 | WORKDIR /app
14 | CMD ["./worker.bin"]
15 |
--------------------------------------------------------------------------------
/frontend/src/components/InterestFooter.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 |
4 | const InterestFooter = props => {
5 | return (
6 |
7 |
{props.title}
8 |
{props.details}
9 |
10 | );
11 | };
12 |
13 | InterestFooter.propTypes = {
14 | title: PropTypes.string
15 | };
16 |
17 | export default InterestFooter;
18 |
--------------------------------------------------------------------------------
/insta/db/kubernetes/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine
2 | RUN apk add --no-cache curl postgresql-client tar bash jq
3 | RUN wget https://github.com/golang-migrate/migrate/releases/download/v4.6.2/migrate.linux-amd64.tar.gz && tar -xf migrate.linux-amd64.tar.gz
4 | RUN mv migrate.linux-amd64 usr/bin/migrate
5 | WORKDIR /script
6 | COPY insta/db/migrations db/migrations
7 | COPY insta/db/kubernetes .
8 | ENTRYPOINT ["bash", "kube-start-postgres.sh"]
9 |
--------------------------------------------------------------------------------
/api/grpcserver/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o grpc_server api/grpcserver/main/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | WORKDIR /app
11 | COPY nlp/frequency-analyzer/cities.json .
12 | COPY --from=builder /app/grpc_server .
13 | CMD ["./grpc_server"]
14 |
--------------------------------------------------------------------------------
/aws_service/proto/renewingAddress.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | package proto;
4 |
5 | service ElasticIpService {
6 | rpc renewElasticIp(RenewingElasticIp) returns (RenewedElasticResult) {}
7 | }
8 |
9 | message RenewedElasticResult {
10 | string elasticIp = 1;
11 | }
12 |
13 | message RenewingElasticIp {
14 | string instanceId = 1;
15 | string node = 2;
16 | string pod = 3;
17 | string pod_ip = 4;
18 | }
19 |
--------------------------------------------------------------------------------
/frontend/src/components/BackButton.js:
--------------------------------------------------------------------------------
1 | import React, { Component, useState } from "react";
2 | import { withRouter, history } from "react-router";
3 | import backicon from "./img/chevron-left-solid.svg";
4 |
5 | const BackButton = () => (
6 |
11 | );
12 |
13 | export default BackButton;
14 |
--------------------------------------------------------------------------------
/frontend/src/components/PostsCard.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 | import Slideshow from "./Slideshow";
4 |
5 | const PostsCard = props => {
6 | return (
7 |
8 |
9 |
10 | );
11 | };
12 |
13 | PostsCard.propTypes = {
14 | slides: PropTypes.array
15 | };
16 |
17 | export default PostsCard;
18 |
--------------------------------------------------------------------------------
/frontend/src/css/endButton.css:
--------------------------------------------------------------------------------
1 | .endButton {
2 | height: 50px;
3 | width: 50px;
4 | background-color: #fafafa;
5 | border-radius: 25px;
6 | position: fixed;
7 | z-index: 2;
8 | top: 20px;
9 | right: 20px;
10 | }
11 |
12 | .endButton a {
13 | width: 50px;
14 | height: 50px;
15 | display: flex;
16 | justify-content: center;
17 | align-items: center;
18 | }
19 |
20 | .endButton a svg {
21 | color: black;
22 | }
23 |
--------------------------------------------------------------------------------
/insta/db/start-postgres.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "# MIGRATE DATABASE"
4 | /migrate -database "postgres://postgres:12345678@postgres:5432/${POSTGRES_DB}?sslmode=disable" -path debezium/migrations up
5 |
6 | pwd
7 |
8 | ls -a
9 |
10 | echo "# PREPARE DEBEZIUM"
11 | curl -i -X POST -H "Accept:application/json" \
12 | -H "Content-Type:application/json" \
13 | http://connect:8083/connectors/ \
14 | -d @db/register-postgres.json
15 |
--------------------------------------------------------------------------------
/insta/db/register-postgres.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "inventory-connector",
3 | "config": {
4 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
5 | "tasks.max": "1",
6 | "database.hostname": "postgres",
7 | "database.port": "5432",
8 | "database.user": "postgres",
9 | "database.password": "12345678",
10 | "database.dbname": "instascraper",
11 | "database.server.name": "postgres"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/twitter/scraper/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7-slim
2 |
3 | WORKDIR /src
4 |
5 | COPY requirements.txt .
6 | RUN pip install -r requirements.txt
7 |
8 | COPY ./twitterscraper ./twitterscraper
9 |
10 | ENTRYPOINT [ "python" ]
11 | CMD [ "-c", "raise Exception('Please set the CMD to either `-m twitterscraper.posts_scraper.py`, `-m twitterscraper.users_scraper.py`, `-m twitterscraper.follwers_scraper.py` or `-m twitterscraper.follwing_scraper.py`')" ]
12 |
--------------------------------------------------------------------------------
/elastic/utils.go:
--------------------------------------------------------------------------------
1 | package elastic
2 |
3 | import (
4 | "github.com/elastic/go-elasticsearch/v7"
5 | )
6 |
7 | // InitializeElasticSearch returns an initialised elastic search client
8 | func InitializeElasticSearch(esHosts []string) *elasticsearch.Client {
9 | cfg := elasticsearch.Config{
10 | Addresses: esHosts,
11 | }
12 | client, err := elasticsearch.NewClient(cfg)
13 |
14 | if err != nil {
15 | panic(err)
16 | }
17 | return client
18 | }
19 |
--------------------------------------------------------------------------------
/twitter/debezium/register-postgres.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "inventory-connector",
3 | "config": {
4 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
5 | "tasks.max": "1",
6 | "database.hostname": "postgres",
7 | "database.port": "5432",
8 | "database.user": "postgres",
9 | "database.password": "12345678",
10 | "database.dbname": "smag-twitter",
11 | "database.server.name": "postgres"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/frontend/src/components/Button.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 |
3 | function Button(props) {
4 | if (props.buttonlink) {
5 | return (
6 |
9 | );
10 | }
11 |
12 | return (
13 |
14 | {props.children}
15 |
16 | );
17 | }
18 |
19 | export default Button;
20 |
--------------------------------------------------------------------------------
/insta/indexer/faces/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o insta_faces_indexer \
9 | insta/indexer/faces/insta_faces_indexer.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/insta_faces_indexer .
15 | CMD ["./insta_faces_indexer"]
16 |
--------------------------------------------------------------------------------
/insta/indexer/posts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o insta_posts_indexer \
9 | insta/indexer/posts/insta_posts_indexer.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/insta_posts_indexer .
15 | CMD ["./insta_posts_indexer"]
16 |
--------------------------------------------------------------------------------
/insta/indexer/users/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o insta_users_indexer \
9 | insta/indexer/users/insta_users_indexer.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/insta_users_indexer .
15 | CMD ["./insta_users_indexer"]
16 |
--------------------------------------------------------------------------------
/aws_service/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o service.bin aws_service/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/service.bin /app
14 | WORKDIR /app
15 | CMD ["./service.bin"]
16 |
--------------------------------------------------------------------------------
/twitter/inserter/posts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o twitter_inserter_posts \
9 | twitter/inserter/posts/main/main.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/twitter_inserter_posts .
15 | CMD ["./twitter_inserter_posts"]
16 |
--------------------------------------------------------------------------------
/twitter/inserter/users/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o twitter_inserter_users \
9 | twitter/inserter/users/main/main.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/twitter_inserter_users .
15 | CMD ["./twitter_inserter_users"]
16 |
--------------------------------------------------------------------------------
/twitter/scraper/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "twitter_scraper"
3 | version = "0.1.0"
4 | description = ""
5 | authors = ["Urhengulas "]
6 |
7 | [tool.poetry.dependencies]
8 | python = "^3.7"
9 | twint = "^2.1.2"
10 | kafka-python = "^1.4.7"
11 |
12 | [tool.poetry.dev-dependencies]
13 | pylint = "^2.4.2"
14 | autopep8 = "^1.4.4"
15 |
16 | [build-system]
17 | requires = ["poetry>=0.12"]
18 | build-backend = "poetry.masonry.api"
19 |
--------------------------------------------------------------------------------
/frontend/src/components/StatsCard.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 |
4 | const StatsCard = props => {
5 | return (
6 |
7 |
We were able to reuse
8 |
{props.count}
9 |
snippets of your data.
10 |
11 | );
12 | };
13 |
14 | StatsCard.propTypes = {
15 | count: PropTypes.number
16 | };
17 |
18 | export default StatsCard;
19 |
--------------------------------------------------------------------------------
/twitter/filter/user_names/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o twitter_filter_user_names \
9 | twitter/filter/user_names/main.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/twitter_filter_user_names .
15 | CMD ["./twitter_filter_user_names"]
16 |
--------------------------------------------------------------------------------
/insta/filter/user_names/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o kafka_changestream insta/filter/user_names/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | WORKDIR /app
12 | COPY http_header-generator/useragents.json .
13 | COPY --from=builder /app/kafka_changestream .
14 | CMD ["./kafka_changestream"]
15 |
--------------------------------------------------------------------------------
/insta/indexer/comments/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build \
7 | -installsuffix cgo \
8 | -o insta_comments_indexer \
9 | insta/indexer/comments/insta_comments_indexer.go
10 |
11 | FROM alpine
12 | RUN apk --no-cache add ca-certificates
13 | WORKDIR /app
14 | COPY --from=builder /app/insta_comments_indexer .
15 | CMD ["./insta_comments_indexer"]
16 |
--------------------------------------------------------------------------------
/faces/proto/recognizer.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | package proto;
4 |
5 | message RecognizeRequest {
6 | string url = 1;
7 | }
8 |
9 | message Face {
10 | int32 x = 1;
11 | int32 y = 2;
12 | int32 width = 3;
13 | int32 height = 4;
14 | repeated float encoding = 5;
15 | }
16 |
17 | message RegognizeResponse {
18 | repeated Face faces = 1;
19 | }
20 |
21 |
22 | service FaceRecognizer {
23 | rpc RecognizeFaces(RecognizeRequest) returns (RegognizeResponse);
24 | }
25 |
--------------------------------------------------------------------------------
/frontend/src/components/ProfileCard.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 |
4 | const ProfileCard = props => {
5 | return (
6 |
7 |

12 |
13 | );
14 | };
15 |
16 | ProfileCard.propTypes = {
17 | pictureUrl: PropTypes.string
18 | };
19 |
20 | export default ProfileCard;
21 |
--------------------------------------------------------------------------------
/insta/inserter/postgres/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o instascraper_postgres_inserter insta/inserter/postgres/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY --from=builder /app/instascraper_postgres_inserter /app
13 | WORKDIR /app
14 | CMD ["./instascraper_postgres_inserter"]
15 |
--------------------------------------------------------------------------------
/frontend/src/components/BioCard.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import H2 from "./H2";
3 |
4 | class BioCard extends Component {
5 | render() {
6 | return (
7 |
8 |
9 |
10 |
11 | {this.props.bio}
12 | |
13 |
14 |
15 |
16 | );
17 | }
18 | }
19 |
20 | export default BioCard;
21 |
--------------------------------------------------------------------------------
/insta/scraper/likes/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_likes_scraper insta/scraper/likes/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_likes_scraper /app
14 | WORKDIR /app
15 | CMD ["./insta_likes_scraper"]
16 |
--------------------------------------------------------------------------------
/insta/scraper/posts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_posts_scraper insta/scraper/posts/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_posts_scraper /app
14 | WORKDIR /app
15 | CMD ["./insta_posts_scraper"]
16 |
--------------------------------------------------------------------------------
/insta/scraper/user/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o instascraper_scraper insta/scraper/user/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/instascraper_scraper /app
14 | WORKDIR /app
15 | CMD ["./instascraper_scraper"]
16 |
--------------------------------------------------------------------------------
/scraper-client/scraper-client.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import "net/http"
4 |
5 | // ScraperClient is some implementation of a http client usable for scraping
6 | type ScraperClient interface {
7 | WithRetries(times int, f func() error) error
8 | Do(request *http.Request) (*http.Response, error)
9 | }
10 |
11 | // HTTPStatusError ...
12 | type HTTPStatusError struct {
13 | S string
14 | }
15 |
16 | // Error for the error interface
17 | func (e *HTTPStatusError) Error() string {
18 | return e.S
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/src/components/IGPost.js:
--------------------------------------------------------------------------------
1 | import React, { Component, useState } from "react";
2 | import { withRouter, history } from "react-router";
3 |
4 | function IGPost({ post }) {
5 | return (
6 |
13 | );
14 | }
15 |
16 | export default IGPost;
17 |
--------------------------------------------------------------------------------
/frontend/src/components/img/chevron-left-solid.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/insta/db/kubernetes/kube-register-postgres.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "inventory-connector",
3 | "config": {
4 | "plugin.name": "wal2json",
5 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
6 | "tasks.max": "1",
7 | "database.hostname": "my-postgres-postgresql",
8 | "database.port": "5432",
9 | "database.user": "postgres",
10 | "database.password": "12345678",
11 | "database.dbname": "instascraper",
12 | "database.server.name": "postgres"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/insta/inserter/likes/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_likes_inserter insta/inserter/likes/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_likes_inserter /app
14 | WORKDIR /app
15 | CMD ["./insta_likes_inserter"]
16 |
--------------------------------------------------------------------------------
/insta/inserter/posts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_posts_inserter insta/inserter/posts/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_posts_inserter /app
14 | WORKDIR /app
15 | CMD ["./insta_posts_inserter"]
16 |
--------------------------------------------------------------------------------
/neo4j/create-import-user-json/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod go.sum ./
4 | RUN go mod download
5 | COPY . .
6 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o create_import_json neo4j/create-import-user-json/main/main.go
7 |
8 | FROM alpine
9 | RUN apk --no-cache add ca-certificates
10 | RUN mkdir /app
11 | COPY http_header-generator/useragents.json /app
12 | COPY --from=builder /app/create_import_json /app
13 | WORKDIR /app
14 | CMD ["./create_import_json"]
15 |
--------------------------------------------------------------------------------
/insta/scraper/comments/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_comments_scraper insta/scraper/comments/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_comments_scraper /app
14 | WORKDIR /app
15 | CMD ["./insta_comments_scraper"]
16 |
--------------------------------------------------------------------------------
/elastic/models/insta.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | type InstaUser struct {
4 | ID int `json:"id"`
5 | Username string `json:"user_name"`
6 | Realname string `json:"real_name"`
7 | Bio string `json:"bio"`
8 | }
9 |
10 | type InstaPost struct {
11 | ID int `json:"id"`
12 | UserID int `json:"user_id"`
13 | Caption string `json:"caption"`
14 | }
15 |
16 | type InstaComment struct {
17 | ID int `json:"id"`
18 | PostID int `json:"post_id"`
19 | Comment string `json:"comment_text"`
20 | }
21 |
--------------------------------------------------------------------------------
/insta/inserter/comments/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13 as builder
2 | WORKDIR /app
3 | COPY go.mod .
4 | COPY go.sum .
5 | RUN go mod download
6 | COPY . .
7 | RUN CGO_ENABLED=0 GOOS=linux go build -installsuffix cgo -o insta_comments_inserter insta/inserter/comments/main/main.go
8 |
9 | FROM alpine
10 | RUN apk --no-cache add ca-certificates
11 | RUN mkdir /app
12 | COPY http_header-generator/useragents.json /app
13 | COPY --from=builder /app/insta_comments_inserter /app
14 | WORKDIR /app
15 | CMD ["./insta_comments_inserter"]
16 |
--------------------------------------------------------------------------------
/frontend/src/components/LocationCard.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import H2 from "./H2";
3 |
4 | class LocationCard extends Component {
5 | render() {
6 | return (
7 |
8 |
9 |
10 |
11 | {this.props.bio}
12 | |
13 |
14 |
15 |
16 | );
17 | }
18 | }
19 |
20 | export default LocationCard;
21 |
--------------------------------------------------------------------------------
/elastic/build/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu as plugin-builder
2 | RUN apt update && apt install -y git maven openjdk-8-jdk
3 | WORKDIR /src
4 | RUN git clone -b es-7.1 --single-branch https://github.com/lior-k/fast-elasticsearch-vector-scoring.git
5 | WORKDIR /src/fast-elasticsearch-vector-scoring
6 | RUN mvn package
7 |
8 | FROM elasticsearch:7.1.0
9 | COPY --from=plugin-builder /src/fast-elasticsearch-vector-scoring/target/releases/elasticsearch-binary-vector-scoring-7.1.0.zip /plugins/
10 | RUN bin/elasticsearch-plugin install file:///plugins/elasticsearch-binary-vector-scoring-7.1.0.zip
11 |
--------------------------------------------------------------------------------
/frontend/src/components/EndButton.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import { FontAwesomeIcon } from "@fortawesome/react-fontawesome";
3 | import { faTimes } from "@fortawesome/free-solid-svg-icons";
4 | import { Link } from "react-router-dom";
5 | import "./../css/endButton.css";
6 |
7 | class EndButton extends Component {
8 | render() {
9 | return (
10 |
11 |
12 |
13 |
14 |
15 | );
16 | }
17 | }
18 |
19 | export default EndButton;
20 |
--------------------------------------------------------------------------------
/insta/scraper/user/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/user"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | )
8 |
9 | func main() {
10 | nameReaderConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig()
11 |
12 | s := scraper.New(
13 | kafka.NewReader(nameReaderConfig),
14 | kafka.NewWriter(infoWriterConfig),
15 | kafka.NewWriter(errWriterConfig),
16 | )
17 | service.CloseOnSignal(s)
18 | waitUntilDone := s.Start()
19 |
20 | waitUntilDone()
21 | }
22 |
--------------------------------------------------------------------------------
/config/postgres-config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import "github.com/codeuniversity/smag-mvp/utils"
4 |
5 | // PostgresConfig holds all the configurable variables for Postgres
6 | type PostgresConfig struct {
7 | PostgresHost string
8 | PostgresPassword string
9 | }
10 |
11 | //GetPostgresConfig returns a inizialized Postgres Config
12 | func GetPostgresConfig() *PostgresConfig {
13 | return &PostgresConfig{
14 | PostgresHost: utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1"),
15 | PostgresPassword: utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", ""),
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/utils/neo4j-utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | // Neo4jConfig holds all necessary informations to connect to a neo4j database
4 | type Neo4jConfig struct {
5 | Host string
6 | Username string
7 | Password string
8 | }
9 |
10 | //getNeo4jConfig returns a initialized Neo4jConfig object by reading the values from env variables
11 | func GetNeo4jConfig() *Neo4jConfig {
12 | return &Neo4jConfig{
13 | Host: GetStringFromEnvWithDefault("NEO4J_HOST", "localhost"),
14 | Username: GetStringFromEnvWithDefault("NEO4J_USERNAME", "neo4j"),
15 | Password: GetStringFromEnvWithDefault("NEO4J_PASSWORD", ""),
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/frontend/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "short_name": "React App",
3 | "name": "Create React App Sample",
4 | "icons": [
5 | {
6 | "src": "favicon.ico",
7 | "sizes": "64x64 32x32 24x24 16x16",
8 | "type": "image/x-icon"
9 | },
10 | {
11 | "src": "logo192.png",
12 | "type": "image/png",
13 | "sizes": "192x192"
14 | },
15 | {
16 | "src": "logo512.png",
17 | "type": "image/png",
18 | "sizes": "512x512"
19 | }
20 | ],
21 | "start_url": ".",
22 | "display": "standalone",
23 | "theme_color": "#000000",
24 | "background_color": "#ffffff"
25 | }
26 |
--------------------------------------------------------------------------------
/twitter/scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | aiodns==2.0.0
2 | aiohttp==3.6.2
3 | aiohttp-socks==0.2.2
4 | async-timeout==3.0.1
5 | attrs==19.3.0
6 | beautifulsoup4==4.8.1
7 | cchardet==2.1.4
8 | cffi==1.13.0
9 | chardet==3.0.4
10 | elasticsearch==7.0.5
11 | fake-useragent==0.1.11
12 | geographiclib==1.50
13 | geopy==1.20.0
14 | idna==2.8
15 | kafka-python==1.4.7
16 | multidict==4.5.2
17 | numpy==1.17.2
18 | pandas==0.25.1
19 | pycares==3.0.0
20 | pycparser==2.19
21 | pysocks==1.7.1
22 | python-dateutil==2.8.0
23 | pytz==2019.3
24 | schedule==0.6.0
25 | six==1.12.0
26 | soupsieve==1.9.4
27 | twint==2.1.2
28 | urllib3==1.25.6
29 | yarl==1.3.0
30 |
--------------------------------------------------------------------------------
/frontend/src/components/InterestCard.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 | import Slideshow from "./Slideshow";
4 | import InterestFooter from "./InterestFooter";
5 |
6 | const InterestCard = props => {
7 | return (
8 |
14 | );
15 | };
16 |
17 | InterestCard.propTypes = {
18 | slides: PropTypes.array,
19 | title: PropTypes.string,
20 | details: PropTypes.string
21 | };
22 |
23 | export default InterestCard;
24 |
--------------------------------------------------------------------------------
/tools/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 |
3 | RUN apt update && apt install curl wget file xz-utils postgresql-client zsh git kafkacat jq -y
4 |
5 | # install github.com/fgeller/kt
6 | RUN wget https://github.com/fgeller/kt/releases/download/v12.1.0/kt-v12.1.0-linux-amd64.txz && \
7 | cat kt-v12.1.0-linux-amd64.txz | unxz > kt-v12.1.0-linux-amd64 && \
8 | tar -xvf kt-v12.1.0-linux-amd64 && \
9 | mv kt /usr/local/bin && \
10 | rm kt-v12.1.0-linux-amd64.txz && \
11 | rm kt-v12.1.0-linux-amd64
12 |
13 | RUN zsh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
14 | COPY .zshrc /root
15 |
16 | WORKDIR /home/tools
17 |
18 | ENTRYPOINT [ "zsh"]
19 |
--------------------------------------------------------------------------------
/insta/db/kubernetes/kube-start-postgres.sh:
--------------------------------------------------------------------------------
1 | migrate -database "postgres://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:5432/instascraper?sslmode=disable" -path db/migrations up
2 |
3 | jq --arg name "$POSTGRES_USER" --arg password "$POSTGRES_PASSWORD" --arg host "$POSTGRES_HOST" --arg dbname "$POSTGRES_DATABASE" '.config."database.user"=$name | .config."database.password"=$password | .config."database.hostname"=$host | .config."database.dbname"=$dbname' kube-register-postgres.json > kube-register-postgres-secret.json
4 |
5 | curl -i -X POST -H "Accept:application/json" \
6 | -H "Content-Type:application/json" \
7 | http://deb-connect-service:8083/connectors/ \
8 | -d @kube-register-postgres-secret.json
9 |
--------------------------------------------------------------------------------
/insta/inserter/likes/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/likes"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
13 |
14 | qReaderConfig := kafka.GetInserterConfig()
15 |
16 | s := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig))
17 |
18 | service.CloseOnSignal(s)
19 | waitUntilClosed := s.Start()
20 |
21 | waitUntilClosed()
22 | }
23 |
--------------------------------------------------------------------------------
/tools/.zshrc:
--------------------------------------------------------------------------------
1 |
2 | # If you come from bash you might have to change your $PATH.
3 | # export PATH=$HOME/bin:/usr/local/bin:$PATH
4 |
5 | # Path to your oh-my-zsh installation.
6 | export ZSH="/root/.oh-my-zsh"
7 |
8 | # Set name of the theme to load --- if set to "random", it will
9 | # load a random theme each time oh-my-zsh is loaded, in which case,
10 | # to know which specific one was loaded, run: echo $RANDOM_THEME
11 | # See https://github.com/robbyrussell/oh-my-zsh/wiki/Themes
12 | ZSH_THEME="miloshadzic"
13 |
14 |
15 | DISABLE_AUTO_UPDATE="true"
16 |
17 | plugins=(git)
18 |
19 | source $ZSH/oh-my-zsh.sh
20 |
21 | export RPROMPT="%{$fg_bold[blue]%} codesmag/tools %{$fg_bold[blue]%} [%D{%y/%m/%f}|%@] %{$reset_color%}%%"
22 |
--------------------------------------------------------------------------------
/insta/inserter/comments/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/comments"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
13 |
14 | qReaderConfig := kafka.GetInserterConfig()
15 |
16 | s := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig))
17 |
18 | service.CloseOnSignal(s)
19 | waitUntilClosed := s.Start()
20 |
21 | waitUntilClosed()
22 | }
23 |
--------------------------------------------------------------------------------
/scraper-client/scraper-config.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import "github.com/codeuniversity/smag-mvp/utils"
4 |
5 | type ScraperConfig struct {
6 | ElasticAssignmentTimeout int
7 | RequestTimeout int
8 | RequestRetryCount int
9 | ElasticIpRetryCount int
10 | }
11 |
12 | func GetScraperConfig() *ScraperConfig {
13 | return &ScraperConfig{
14 | ElasticAssignmentTimeout: utils.GetNumberFromEnvWithDefault("ELASTIC_ASSIGNMENT_TIMEOUT", 10000),
15 | RequestTimeout: utils.GetNumberFromEnvWithDefault("REQUEST_TIMEOUT", 1000),
16 | RequestRetryCount: utils.GetNumberFromEnvWithDefault("REQUEST_RETRY_COUNT", 3),
17 | ElasticIpRetryCount: utils.GetNumberFromEnvWithDefault("ELASTIC_IP_RETRY_COUNT", 2),
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/neo4j/create-import-user-json/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | neo4j_import "github.com/codeuniversity/smag-mvp/neo4j/create-import-user-json"
5 | "github.com/codeuniversity/smag-mvp/service"
6 | "github.com/codeuniversity/smag-mvp/utils"
7 | )
8 |
9 | func main() {
10 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
11 |
12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
13 | rTopic := utils.MustGetStringFromEnv("KAFKA_CHANGE_TOPIC")
14 | kafkaChunk := utils.GetNumberFromEnvWithDefault("KAFKA_MESSAGE_CHUNK", 10)
15 |
16 | i := neo4j_import.New(kafkaAddress, rTopic, groupID, kafkaChunk)
17 |
18 | service.CloseOnSignal(i)
19 | waitUntilClosed := i.Start()
20 |
21 | waitUntilClosed()
22 | }
23 |
--------------------------------------------------------------------------------
/insta/inserter/postgres/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/postgres"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | var i *inserter.Inserter
12 |
13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
15 |
16 | qReaderConfig := kafka.GetInserterConfig()
17 |
18 | i = inserter.New(
19 | postgresHost,
20 | postgresPassword,
21 | kafka.NewReader(qReaderConfig),
22 | )
23 |
24 | service.CloseOnSignal(i)
25 | waitUntilClosed := i.Start()
26 |
27 | waitUntilClosed()
28 | }
29 |
--------------------------------------------------------------------------------
/frontend/src/pages/Greeting.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import Button from "../components/Button";
3 | import "./../index.css";
4 | import H1 from "../components/H1";
5 | import H2 from "../components/H2";
6 |
7 | class Greeting extends Component {
8 | render() {
9 | return (
10 |
11 |
12 |
13 |
Welcome to SocialRecord
14 |
Sit back and enjoy the experience.
15 |
16 |
17 |
18 |
19 |
20 |
21 | );
22 | }
23 | }
24 |
25 | export default Greeting;
26 |
--------------------------------------------------------------------------------
/frontend/src/pages/GroupIntent.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import Button from "../components/Button";
3 | import "./../index.css";
4 | import H1 from "../components/H1";
5 | import H2 from "../components/H2";
6 |
7 | function GroupIntent({ nextPage }) {
8 | return (
9 |
10 |
11 |
12 |
Anyone can do this
13 |
We had limited time and money.
14 |
Imagine what others could do with this power.
15 |
16 |
17 |
18 |
19 |
20 |
21 | );
22 | }
23 |
24 | export default GroupIntent;
25 |
--------------------------------------------------------------------------------
/frontend/src/components/Slideshow.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 | import { Zoom } from "react-slideshow-image";
4 |
5 | const properties = {
6 | duration: 5000,
7 | transitionDuration: 300,
8 | indicators: false,
9 | scale: 1.4,
10 | arrows: false
11 | };
12 |
13 | const Slideshow = props => {
14 | return (
15 |
16 |
17 | {props.slides.map((imageUrl, index) => (
18 |
24 | ))}
25 |
26 |
27 | );
28 | };
29 |
30 | Slideshow.propTypes = {
31 | slides: PropTypes.array
32 | };
33 |
34 | export default Slideshow;
35 |
--------------------------------------------------------------------------------
/insta/scraper/posts/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/posts"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | client "github.com/codeuniversity/smag-mvp/scraper-client"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 | )
10 |
11 | func main() {
12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "")
13 | nameReaderConfig, infoWriterConfig, errWriterConfig := kafka.GetInstaPostsScraperConfig()
14 |
15 | config := client.GetScraperConfig()
16 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(nameReaderConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig))
17 |
18 | service.CloseOnSignal(s)
19 | waitUntilClosed := s.Start()
20 |
21 | waitUntilClosed()
22 | }
23 |
--------------------------------------------------------------------------------
/frontend/src/components/Form.js:
--------------------------------------------------------------------------------
1 | import React, { Component, useState } from "react";
2 | import { withRouter, history } from "react-router";
3 |
4 | // import {onSubmit} from './App';
5 |
6 | // eslint-disable-next-line
7 | function Form(props) {
8 | const [value, setValue] = useState("");
9 |
10 | return (
11 |
12 |
30 |
31 | );
32 | }
33 |
34 | export default Form;
35 |
--------------------------------------------------------------------------------
/twitter/inserter/posts/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/codeuniversity/smag-mvp/kafka"
5 | "github.com/codeuniversity/smag-mvp/service"
6 | inserter "github.com/codeuniversity/smag-mvp/twitter/inserter/posts"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | var i *inserter.Inserter
12 |
13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
15 | postgresDBNAme := utils.GetStringFromEnvWithDefault("POSTGRES_DB_NAME", "twitter_scraper")
16 |
17 | qReaderConfig := kafka.GetInserterConfig()
18 |
19 | i = inserter.New(
20 | postgresHost,
21 | postgresPassword,
22 | postgresDBNAme,
23 | kafka.NewReader(qReaderConfig),
24 | )
25 |
26 | service.CloseOnSignal(i)
27 | waitUntilDone := i.Start()
28 | waitUntilDone()
29 | }
30 |
--------------------------------------------------------------------------------
/twitter/inserter/users/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/codeuniversity/smag-mvp/kafka"
5 | "github.com/codeuniversity/smag-mvp/service"
6 | inserter "github.com/codeuniversity/smag-mvp/twitter/inserter/users"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | var i *inserter.Inserter
12 |
13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
15 | postgresDBNAme := utils.GetStringFromEnvWithDefault("POSTGRES_DB_NAME", "twitter_scraper")
16 |
17 | qReaderConfig := kafka.GetInserterConfig()
18 |
19 | i = inserter.New(
20 | postgresHost,
21 | postgresPassword,
22 | postgresDBNAme,
23 | kafka.NewReader(qReaderConfig),
24 | )
25 |
26 | service.CloseOnSignal(i)
27 | waitUntilDone := i.Start()
28 | waitUntilDone()
29 | }
30 |
--------------------------------------------------------------------------------
/frontend/src/components/Popup.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component, useState } from "react";
2 | import { withRouter, history } from "react-router";
3 | import Button from "../components/Button";
4 | import H1 from "../components/H1";
5 |
6 | function Popup() {
7 | return (
8 |
9 |
10 |
Is this your profile?
11 |
![]()
12 |
username
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | );
24 | }
25 |
26 | export default Popup;
27 |
--------------------------------------------------------------------------------
/insta/scraper/likes/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/likes"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | client "github.com/codeuniversity/smag-mvp/scraper-client"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 | )
10 |
11 | func main() {
12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "")
13 | commentLimit := utils.GetNumberFromEnvWithDefault("LIKE_LIMIT", 24)
14 | readerConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig()
15 |
16 | config := client.GetScraperConfig()
17 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(readerConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig), commentLimit)
18 |
19 | service.CloseOnSignal(s)
20 | waitUntilClosed := s.Start()
21 |
22 | waitUntilClosed()
23 | }
24 |
--------------------------------------------------------------------------------
/frontend/public/favicon/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "App",
3 | "icons": [
4 | {
5 | "src": "\/android-icon-36x36.png",
6 | "sizes": "36x36",
7 | "type": "image\/png",
8 | "density": "0.75"
9 | },
10 | {
11 | "src": "\/android-icon-48x48.png",
12 | "sizes": "48x48",
13 | "type": "image\/png",
14 | "density": "1.0"
15 | },
16 | {
17 | "src": "\/android-icon-72x72.png",
18 | "sizes": "72x72",
19 | "type": "image\/png",
20 | "density": "1.5"
21 | },
22 | {
23 | "src": "\/android-icon-96x96.png",
24 | "sizes": "96x96",
25 | "type": "image\/png",
26 | "density": "2.0"
27 | },
28 | {
29 | "src": "\/android-icon-144x144.png",
30 | "sizes": "144x144",
31 | "type": "image\/png",
32 | "density": "3.0"
33 | },
34 | {
35 | "src": "\/android-icon-192x192.png",
36 | "sizes": "192x192",
37 | "type": "image\/png",
38 | "density": "4.0"
39 | }
40 | ]
41 | }
--------------------------------------------------------------------------------
/insta/scraper/comments/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | scraper "github.com/codeuniversity/smag-mvp/insta/scraper/comments"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | client "github.com/codeuniversity/smag-mvp/scraper-client"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 | )
10 |
11 | func main() {
12 | awsServiceAddress := utils.GetStringFromEnvWithDefault("AWS_SERVICE_ADDRESS", "")
13 | commentLimit := utils.GetNumberFromEnvWithDefault("COMMENT_LIMIT", 24)
14 | readerConfig, infoWriterConfig, errWriterConfig := kafka.GetScraperConfig()
15 |
16 | config := client.GetScraperConfig()
17 | s := scraper.New(config, awsServiceAddress, kafka.NewReader(readerConfig), kafka.NewWriter(infoWriterConfig), kafka.NewWriter(errWriterConfig), commentLimit)
18 |
19 | service.CloseOnSignal(s)
20 | waitUntilClosed := s.Start()
21 |
22 | waitUntilClosed()
23 | }
24 |
--------------------------------------------------------------------------------
/insta/pics-downloader/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/codeuniversity/smag-mvp/config"
5 | downloader "github.com/codeuniversity/smag-mvp/insta/pics-downloader"
6 | "github.com/codeuniversity/smag-mvp/kafka"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 | )
10 |
11 | func main() {
12 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
13 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
14 | jobsTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_DOWNLOADS_TOPIC")
15 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsTopic))
16 |
17 | s3Config := config.GetS3Config()
18 | postgresConfig := config.GetPostgresConfig()
19 |
20 | i := downloader.New(qReader, s3Config, postgresConfig)
21 |
22 | service.CloseOnSignal(i)
23 | waitUntilClosed := i.Start()
24 |
25 | waitUntilClosed()
26 | }
27 |
--------------------------------------------------------------------------------
/insta/inserter/posts/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/posts"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
12 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
13 |
14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
15 |
16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
17 | rTopic := utils.MustGetStringFromEnv("KAFKA_INSTA_POSTS_TOPIC")
18 | qReaderConfig := kafka.NewReaderConfig(kafkaAddress, groupID, rTopic)
19 |
20 | i := inserter.New(postgresHost, postgresPassword, kafka.NewReader(qReaderConfig))
21 |
22 | service.CloseOnSignal(i)
23 | waitUntilClosed := i.Start()
24 |
25 | waitUntilClosed()
26 | }
27 |
--------------------------------------------------------------------------------
/config/s3-config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import "github.com/codeuniversity/smag-mvp/utils"
4 |
5 | // S3Config holds all the configurable variables for S3
6 | type S3Config struct {
7 | S3BucketName string
8 | S3Region string
9 | S3Endpoint string
10 | S3AccessKeyID string
11 | S3SecretAccessKey string
12 | S3UseSSL bool
13 | }
14 |
15 | //GetS3Config returns a inizialized S3 Config
16 | func GetS3Config() *S3Config {
17 | return &S3Config{
18 | S3BucketName: utils.GetStringFromEnvWithDefault("S3_BUCKET_NAME", "insta_pics"),
19 | S3Region: utils.GetStringFromEnvWithDefault("S3_REGION", "eu-west-1"),
20 | S3Endpoint: utils.GetStringFromEnvWithDefault("S3_ENDOINT", "127.0.0.1:9000"),
21 | S3AccessKeyID: utils.MustGetStringFromEnv("S3_ACCESS_KEY_ID"),
22 | S3SecretAccessKey: utils.MustGetStringFromEnv("S3_SECRET_ACCESS_KEY"),
23 | S3UseSSL: utils.GetBoolFromEnvWithDefault("S3_USE_SSL", true),
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/db/utils.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "github.com/jinzhu/gorm"
5 | // necessary for gorm :pointup:
6 | _ "github.com/jinzhu/gorm/dialects/postgres"
7 | )
8 |
9 | // CreateOrUpdate checks whether a specific (gorm) database entry already exists using a model filter,
10 | // creates it in case no record is found and updates the first in case of existing record(s)
11 | func CreateOrUpdate(db *gorm.DB, out interface{}, where interface{}, update interface{}) error {
12 | var err error
13 |
14 | tx := db.Begin()
15 | if tx.Where(where).First(out).RecordNotFound() {
16 | err = tx.Create(update).Scan(out).Error
17 | } else {
18 | err = tx.Model(out).Update(update).Scan(out).Error
19 | }
20 | if err != nil {
21 | tx.Rollback()
22 | return err
23 | }
24 | tx.Commit()
25 |
26 | return nil
27 | }
28 |
29 | //Create just create a new entry in the database
30 | func Create(db *gorm.DB, out interface{}, update interface{}) error {
31 | return db.Create(update).Scan(out).Error
32 | }
33 |
--------------------------------------------------------------------------------
/elastic/search/facetest/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 |
7 | "github.com/codeuniversity/smag-mvp/elastic"
8 | "github.com/codeuniversity/smag-mvp/elastic/search/faces"
9 | "github.com/codeuniversity/smag-mvp/faces/proto"
10 | "google.golang.org/grpc"
11 | )
12 |
13 | func main() {
14 |
15 | if len(os.Args) != 2 {
16 | panic("requires exactly one param - the url to an image")
17 | }
18 |
19 | con, err := grpc.Dial("localhost:6666", grpc.WithInsecure())
20 | if err != nil {
21 | panic(err)
22 | }
23 | faceReconClient := proto.NewFaceRecognizerClient(con)
24 | esClient := elastic.InitializeElasticSearch([]string{"http://localhost:9200"})
25 |
26 | client := &faces.Client{
27 | FaceRecognitionClient: faceReconClient,
28 | ESClient: esClient,
29 | }
30 |
31 | faces, err := client.FindSimilarFacesInImage(os.Args[1], 10)
32 |
33 | fmt.Printf("\n (")
34 | for _, face := range faces {
35 | fmt.Printf("%d,", face.PostID)
36 | }
37 | fmt.Printf(")\n")
38 | }
39 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/utils.py:
--------------------------------------------------------------------------------
1 | import twint
2 |
3 |
4 | def get_conf(user_name: str) -> twint.config.Config:
5 | c = twint.Config()
6 | c.Username = user_name
7 | c.Store_object = True
8 | c.Hide_output = True
9 | return c
10 |
11 |
12 | class ShallowTwitterUser(object):
13 |
14 | # full user object contains
15 |
16 | # id: str = ""
17 | # url: str = ""
18 | # type: str = ""
19 | # name: str = ""
20 | # username: str = ""
21 | # bio: str = ""
22 | # avatar: str = ""
23 | # background_image: str = ""
24 | # location: str = ""
25 | # join_date: str = ""
26 | # join_time: str = ""
27 | # is_private: int = 0
28 | # is_verified: int = 0
29 | # following: int = 0
30 | # following_list: List[str] = [""]
31 | # followers: int = 0
32 | # followers_list: List[str] = [""]
33 | # tweets: int = 0
34 | # likes: int = 0
35 | # media_count: int = 0
36 |
37 | def __init__(self, username):
38 | self.username = username
39 |
--------------------------------------------------------------------------------
/insta/inserter/posts_face/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | inserter "github.com/codeuniversity/smag-mvp/insta/inserter/posts_face"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | var i *inserter.Inserter
12 |
13 | postgresHost := utils.GetStringFromEnvWithDefault("POSTGRES_HOST", "127.0.0.1")
14 | postgresPassword := utils.GetStringFromEnvWithDefault("POSTGRES_PASSWORD", "")
15 |
16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
18 | jobsReadTopic := utils.GetStringFromEnvWithDefault("KAFKA_FACE_DETECTION_RESULTS_TOPIC", "insta_posts_detected_faces")
19 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic))
20 |
21 | i = inserter.New(
22 | postgresHost,
23 | postgresPassword,
24 | qReader,
25 | )
26 |
27 | service.CloseOnSignal(i)
28 | waitUntilClosed := i.Start()
29 |
30 | waitUntilClosed()
31 | }
32 |
--------------------------------------------------------------------------------
/faces/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6-slim-stretch
2 |
3 | RUN apt-get -y update
4 | RUN apt-get install -y --fix-missing \
5 | build-essential \
6 | cmake \
7 | gfortran \
8 | git \
9 | wget \
10 | curl \
11 | graphicsmagick \
12 | libgraphicsmagick1-dev \
13 | libatlas-dev \
14 | libavcodec-dev \
15 | libavformat-dev \
16 | libgtk2.0-dev \
17 | libjpeg-dev \
18 | liblapack-dev \
19 | libswscale-dev \
20 | pkg-config \
21 | python3-dev \
22 | python3-numpy \
23 | software-properties-common \
24 | zip \
25 | && apt-get clean && rm -rf /tmp/* /var/tmp/*
26 |
27 | RUN cd ~ && \
28 | mkdir -p dlib && \
29 | git clone -b 'v19.9' --single-branch https://github.com/davisking/dlib.git dlib/ && \
30 | cd dlib/ && \
31 | python3 setup.py install --yes USE_AVX_INSTRUCTIONS
32 |
33 | WORKDIR /src
34 |
35 | COPY requirements.txt .
36 |
37 | RUN pip install --no-cache-dir -r requirements.txt
38 |
39 | COPY recognizer_pb2_grpc.py .
40 | COPY recognizer_pb2.py .
41 | COPY recognizer.py .
42 | COPY metrics.py .
43 | COPY server.py .
44 |
45 | CMD [ "python", "server.py" ]
46 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # API
2 | gen-server:
3 | protoc --go_out=plugins=grpc:. api/proto/usersearch.proto
4 |
5 | gen-client:
6 | protoc -I=api/proto/ usersearch.proto \
7 | --js_out=import_style=commonjs:frontend/src/protofiles/ \
8 | --grpc-web_out=import_style=commonjs,mode=grpcwebtext:frontend/src/protofiles/
9 |
10 | gen-faces:
11 | protoc --go_out=plugins=grpc:. faces/proto/recognizer.proto
12 |
13 | # INSTAGRAM
14 |
15 | INSTAGRAM_COMPOSE_FILE:=insta-compose.yml
16 |
17 | run-instagram:
18 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up -d --build es-with-plugin zookeeper my-kafka postgres connect minio neo4j
19 | sleep 5
20 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up --build migrate-postgres
21 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) up -d --build
22 | docker-compose -f $(INSTAGRAM_COMPOSE_FILE) logs -f
23 |
24 |
25 | # TWITTER
26 |
27 | TWITTER_COMPOSE_FILE:=twitter-compose.yml
28 |
29 | run-twitter:
30 | docker-compose -f $(TWITTER_COMPOSE_FILE) up -d my-kafka postgres connect
31 | sleep 5
32 | docker-compose -f $(TWITTER_COMPOSE_FILE) up -d --build
33 | docker-compose -f $(TWITTER_COMPOSE_FILE) logs -f
34 |
--------------------------------------------------------------------------------
/frontend/src/pages/endscreen.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import { withRouter } from "react-router";
3 | import Button from "../components/Button";
4 | import "./../index.css";
5 | import H1 from "../components/H1";
6 | import H2 from "../components/H2";
7 |
8 | class EndScreen extends Component {
9 | render() {
10 | return (
11 |
12 |
13 |
14 |
15 | Think again about whether all this data about you should be
16 | visible to everyone!
17 |
18 | If not, we want to give you 3 important tips.
19 |
20 |
21 | 1. Think twice about what information you want to make public.
22 |
23 | 2. Check your private settings again.
24 | 3. Switch your profile to private.
25 |
26 |
27 |
28 |
29 |
30 | );
31 | }
32 | }
33 |
34 | export default EndScreen;
35 |
--------------------------------------------------------------------------------
/faces/recognitiontest/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "github.com/codeuniversity/smag-mvp/faces/proto"
7 | "github.com/codeuniversity/smag-mvp/imgproxy"
8 | "google.golang.org/grpc"
9 | )
10 |
11 | func main() {
12 | con, err := grpc.Dial("localhost:6666", grpc.WithInsecure())
13 | if err != nil {
14 | panic(err)
15 | }
16 | c := proto.NewFaceRecognizerClient(con)
17 | response, err := c.RecognizeFaces(context.Background(), &proto.RecognizeRequest{
18 | Url: "https://cdn.madaracosmetics.com/media/catalog/category/FACE_OK_3.jpg",
19 | })
20 | if err != nil {
21 | panic(err)
22 | }
23 | p, err := imgproxy.New("localhost:8080", "5800c215e5cd5110365c390e83752526fa40758efa4dcc406e3a4fdd6e22877c", "520f986b998545b4785e0defbc4f3c1203f22de2374a3d53cb7a7fe9fea309c5")
24 | if err != nil {
25 | panic(err)
26 | }
27 | faces := response.Faces
28 | for _, face := range faces {
29 | fmt.Println(face)
30 | x := int(face.X)
31 | y := int(face.Y)
32 | width := int(face.Width)
33 | height := int(face.Height)
34 | url := p.GetCropURL(x, y, width, height, "https://cdn.madaracosmetics.com/media/catalog/category/FACE_OK_3.jpg")
35 | fmt.Println(url)
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/insert_seed.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | from time import sleep
5 |
6 | from kafka import KafkaProducer
7 |
8 |
9 | def main():
10 | kafka_host_port = os.getenv("KAFKA_HOST_PORT", "localhost:9092")
11 | seed_name = os.getenv("SEED_NAME", "wpbdry")
12 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "user_names")
13 | wait = int(os.getenv("SLEEP_SECONDS", "0"))
14 |
15 | logging.info(f"sleep for {wait} seconds")
16 | sleep(wait)
17 |
18 | producer = KafkaProducer(
19 | bootstrap_servers=kafka_host_port,
20 | value_serializer=lambda v: json.dumps(v).encode('utf-8'),
21 | reconnect_backoff_ms=500,
22 | reconnect_backoff_max_ms=5000,
23 | )
24 |
25 | logging.info(f"sleep for {wait} seconds")
26 | sleep(wait)
27 |
28 | logging.info(f"Send user_name {seed_name} to kafka/{insert_topic}")
29 | producer.send(insert_topic, seed_name)
30 | producer.flush()
31 |
32 |
33 | if __name__ == "__main__":
34 | logging.basicConfig(
35 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s",
36 | datefmt="%H:%M:%S",
37 | level=logging.INFO,
38 | )
39 | main()
40 |
--------------------------------------------------------------------------------
/api/grpcserver/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | server "github.com/codeuniversity/smag-mvp/api/grpcserver"
5 | "github.com/codeuniversity/smag-mvp/config"
6 | "github.com/codeuniversity/smag-mvp/kafka"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | kgo "github.com/segmentio/kafka-go"
9 | )
10 |
11 | func main() {
12 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "")
13 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "")
14 | grpcPort := utils.GetStringFromEnvWithDefault("GRPC_PORT", "10000")
15 | uploadBucket := utils.MustGetStringFromEnv("S3_UPLOAD_BUCKET_NAME")
16 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9200"})
17 | recognitionServiceAddress := utils.MustGetStringFromEnv("RECOGNITION_SERVICE_ADDRESS")
18 | s3Config := config.GetS3Config()
19 | postgresConfig := config.GetPostgresConfig()
20 |
21 | var writer *kgo.Writer
22 | if kafkaAddress != "" && namesTopic != "" {
23 | writer = kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, namesTopic, false))
24 | }
25 |
26 | s := server.NewGrpcServer(grpcPort, writer, s3Config, uploadBucket, postgresConfig, esHosts, recognitionServiceAddress)
27 |
28 | s.Listen()
29 | }
30 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/posts/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13-alpine as builder
2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig
3 | # clone seabolt-1.7.0 source code
4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt
5 | # invoke cmake build and install artifacts - default location is /usr/local
6 | WORKDIR /seabolt/build
7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect
8 | # in our generated pkg-config file
9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install
10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum
11 |
12 | WORKDIR /app
13 | COPY go.mod .
14 | COPY go.sum .
15 | RUN go mod download
16 | COPY . .
17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_posts-inserter insta/inserter/neo4j/posts/main.go
18 |
19 | FROM alpine
20 | RUN apk --no-cache add ca-certificates
21 | RUN mkdir /app
22 | COPY http_header-generator/useragents.json /app
23 | COPY --from=builder /app/neo4j_posts-inserter /app
24 | WORKDIR /app
25 | CMD ["./neo4j_posts-inserter"]
26 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/user/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13-alpine as builder
2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig
3 | # clone seabolt-1.7.0 source code
4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt
5 | # invoke cmake build and install artifacts - default location is /usr/local
6 | WORKDIR /seabolt/build
7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect
8 | # in our generated pkg-config file
9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install
10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum
11 |
12 | WORKDIR /app
13 | COPY go.mod .
14 | COPY go.sum .
15 | RUN go mod download
16 | COPY . .
17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_user-inserter insta/inserter/neo4j/user/main.go
18 |
19 |
20 | FROM alpine
21 | RUN apk --no-cache add ca-certificates
22 | RUN mkdir /app
23 | COPY http_header-generator/useragents.json /app
24 | COPY --from=builder /app/neo4j_user-inserter /app
25 | WORKDIR /app
26 | CMD ["./neo4j_user-inserter"]
27 |
--------------------------------------------------------------------------------
/scraper-client/simple-scraper-client.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "net/http"
5 | "time"
6 |
7 | generator "github.com/codeuniversity/smag-mvp/http_header-generator"
8 | )
9 |
10 | // SimpleScraperClient handles retries and setting random headers for scraping
11 | type SimpleScraperClient struct {
12 | currentAddress string
13 | client *http.Client
14 | instanceID string
15 | *generator.HTTPHeaderGenerator
16 | }
17 |
18 | // NewSimpleScraperClient returns an initialized SimpleScraperClient
19 | func NewSimpleScraperClient() *SimpleScraperClient {
20 | client := &SimpleScraperClient{}
21 | client.HTTPHeaderGenerator = generator.New()
22 | client.client = &http.Client{}
23 | return client
24 | }
25 |
26 | // WithRetries calls f with retries
27 | func (s *SimpleScraperClient) WithRetries(times int, f func() error) error {
28 | var err error
29 | for i := 0; i < times; i++ {
30 | err = f()
31 |
32 | if err == nil {
33 | return nil
34 | }
35 | time.Sleep(100 * time.Millisecond)
36 | }
37 | return err
38 | }
39 |
40 | // Do the request with correct headers
41 | func (s *SimpleScraperClient) Do(request *http.Request) (*http.Response, error) {
42 | s.AddHeaders(&request.Header)
43 | return s.client.Do(request)
44 | }
45 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/tagged_users/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.13-alpine as builder
2 | RUN apk add --no-cache ca-certificates cmake make g++ openssl-dev openssl-libs-static git curl pkgconfig
3 | # clone seabolt-1.7.0 source code
4 | RUN git clone -b v1.7.4 https://github.com/neo4j-drivers/seabolt.git /seabolt
5 | # invoke cmake build and install artifacts - default location is /usr/local
6 | WORKDIR /seabolt/build
7 | # CMAKE_INSTALL_LIBDIR=lib is a hack where we override default lib64 to lib to workaround a defect
8 | # in our generated pkg-config file
9 | RUN cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_LIBDIR=lib .. && cmake --build . --target install
10 | RUN curl -sSL "https://github.com/gotestyourself/gotestsum/releases/download/v0.3.1/gotestsum_0.3.1_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin gotestsum
11 |
12 | WORKDIR /app
13 | COPY go.mod .
14 | COPY go.sum .
15 | RUN go mod download
16 | COPY . .
17 | RUN GOOS=linux go build --tags seabolt_static -o neo4j_user-inserter insta/inserter/neo4j/tagged_users/main.go
18 |
19 | FROM alpine
20 | RUN apk --no-cache add ca-certificates
21 | RUN mkdir /app
22 | COPY http_header-generator/useragents.json /app
23 | COPY --from=builder /app/neo4j_user-inserter /app
24 | WORKDIR /app
25 | CMD ["./neo4j_user-inserter"]
26 |
--------------------------------------------------------------------------------
/insta/filter/user_names/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 |
10 | "github.com/segmentio/kafka-go"
11 | )
12 |
13 | func main() {
14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
15 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
16 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.users")
17 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "user_names")
18 |
19 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, namesTopic, filterChange)
20 |
21 | service.CloseOnSignal(f)
22 | waitUntilClosed := f.Start()
23 |
24 | waitUntilClosed()
25 | }
26 |
27 | type user struct {
28 | ID int `json:"id"`
29 | UserName string `json:"user_name"`
30 | }
31 |
32 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) {
33 | if m.Payload.Op != "c" {
34 | return nil, nil
35 | }
36 |
37 | u := &user{}
38 | err := json.Unmarshal(m.Payload.After, u)
39 | if err != nil {
40 | return nil, err
41 | }
42 |
43 | return []kafka.Message{{Value: []byte(u.UserName)}}, nil
44 | }
45 |
--------------------------------------------------------------------------------
/twitter/filter/user_names/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
7 | "github.com/codeuniversity/smag-mvp/service"
8 | "github.com/codeuniversity/smag-mvp/utils"
9 | "github.com/segmentio/kafka-go"
10 | )
11 |
12 | func main() {
13 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
14 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
15 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.twitter_users")
16 | namesTopic := utils.GetStringFromEnvWithDefault("KAFKA_NAME_TOPIC", "twitter-user_names")
17 |
18 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, namesTopic, filterChange)
19 |
20 | service.CloseOnSignal(f)
21 | waitUntilClose := f.Start()
22 |
23 | waitUntilClose()
24 | }
25 |
26 | type user struct {
27 | ID int `json:"id"`
28 | Username string `json:"username"`
29 | }
30 |
31 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) {
32 | if m.Payload.Op != "c" {
33 | return nil, nil
34 | }
35 |
36 | u := &user{}
37 | err := json.Unmarshal(m.Payload.After, u)
38 | if err != nil {
39 | return nil, err
40 | }
41 |
42 | return []kafka.Message{{Value: []byte(u.Username)}}, nil
43 | }
44 |
--------------------------------------------------------------------------------
/nlp/frequency-analyzer/cities.json:
--------------------------------------------------------------------------------
1 | {
2 | "berlin": [
3 | "berlin",
4 | "BLN",
5 | "Brandenburg Gate",
6 | "Brandenburger Tor",
7 | "Bundestag"
8 | ],
9 | "london": [
10 | "london",
11 | "LDN",
12 | "Big Ben",
13 | "Tower Bridge"
14 | ],
15 | "munich": [
16 | "munich",
17 | "münchen",
18 | "MUC"
19 | ],
20 | "hamburg": [
21 | "hamburg",
22 | "⚓"
23 | ],
24 | "paris": [
25 | "paris",
26 | "Eiffel Tower",
27 | "Eiffelturm",
28 | "cdg"
29 | ],
30 | "rome": [
31 | "rome",
32 | "rom",
33 | "colosseum"
34 | ],
35 | "amsterdam": [
36 | "amsterdam",
37 | "AMS"
38 | ],
39 | "barcelona": [
40 | "barcelona",
41 | "BCN"
42 | ],
43 | "copenhagen": [
44 | "copenhagen",
45 | "kopenhagen",
46 | "cph"
47 | ],
48 | "lisbon": [
49 | "lisbon",
50 | "lissabon",
51 | "lis"
52 | ],
53 | "vienna": [
54 | "vienna",
55 | "wien",
56 | "VIE"
57 | ],
58 | "prague": [
59 | "prague",
60 | "prag",
61 | "PRG"
62 | ],
63 | "madrid": [
64 | "madrid",
65 | "mad"
66 | ]
67 | }
--------------------------------------------------------------------------------
/cli/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "os"
7 | "time"
8 |
9 | "github.com/codeuniversity/smag-mvp/utils"
10 | "github.com/segmentio/kafka-go"
11 | )
12 |
13 | func main() {
14 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
15 | instagramTopic := utils.GetStringFromEnvWithDefault("KAFKA_INSTAGRAM_TOPIC", "user_names")
16 | twitterTopic := utils.GetStringFromEnvWithDefault("KAFKA_TWITTER_TOPIC", "twitter.scraped.user_names")
17 |
18 | if len(os.Args) < 3 {
19 | panic("Invalid argumemts. Usage: cli ")
20 | }
21 |
22 | platformArg := os.Args[1]
23 | userNameArg := os.Args[2]
24 |
25 | var topic string
26 | switch platformArg {
27 | case "instagram":
28 | topic = instagramTopic
29 | break
30 | case "twitter":
31 | topic = twitterTopic
32 | break
33 | default:
34 | panic(fmt.Sprintf("Invalid platform option: %s\n", platformArg))
35 | }
36 |
37 | w := kafka.NewWriter(kafka.WriterConfig{
38 | Brokers: []string{kafkaAddress},
39 | Topic: topic,
40 | Balancer: &kafka.LeastBytes{},
41 | })
42 | defer w.Close()
43 | t, cancel := context.WithTimeout(context.Background(), time.Second*10)
44 | defer cancel()
45 | err := w.WriteMessages(t, kafka.Message{
46 | Value: []byte(userNameArg),
47 | })
48 | utils.PanicIfNotNil(err)
49 | }
50 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/codeuniversity/smag-mvp
2 |
3 | go 1.13
4 |
5 | require (
6 | github.com/PuerkitoBio/goquery v1.5.0 // indirect
7 | github.com/antchfx/htmlquery v1.1.0 // indirect
8 | github.com/antchfx/xmlquery v1.1.0 // indirect
9 | github.com/antchfx/xpath v1.1.0 // indirect
10 | github.com/aws/aws-sdk-go v1.25.16
11 | github.com/elastic/go-elasticsearch/v7 v7.4.1
12 | github.com/go-ini/ini v1.51.0 // indirect
13 | github.com/gobwas/glob v0.2.3 // indirect
14 | github.com/gocolly/colly v1.2.0
15 | github.com/golang/protobuf v1.3.2
16 | github.com/google/uuid v1.1.1
17 | github.com/jinzhu/gorm v1.9.11
18 | github.com/johnnadratowski/golang-neo4j-bolt-driver v0.0.0-20181101021923-6b24c0085aae
19 | github.com/kennygrant/sanitize v1.2.4 // indirect
20 | github.com/kr/pretty v0.1.0 // indirect
21 | github.com/lib/pq v1.2.0
22 | github.com/minio/minio-go v6.0.14+incompatible
23 | github.com/minio/minio-go/v6 v6.0.39
24 | github.com/neo4j-drivers/gobolt v1.7.4 // indirect
25 | github.com/neo4j/neo4j-go-driver v1.7.4
26 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
27 | github.com/segmentio/kafka-go v0.3.4
28 | github.com/stretchr/testify v1.4.0
29 | github.com/temoto/robotstxt v1.1.1 // indirect
30 | gocv.io/x/gocv v0.21.0
31 | golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582
32 | google.golang.org/grpc v1.24.0
33 | )
34 |
--------------------------------------------------------------------------------
/insta/models/faces.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import (
4 | "github.com/jinzhu/gorm"
5 | "github.com/jinzhu/gorm/dialects/postgres"
6 | )
7 |
8 | // FaceData represents the face encoding table
9 | type FaceData struct {
10 | gorm.Model
11 | PostID int `json:"post_id"`
12 | X int `json:"x"`
13 | Y int `json:"y"`
14 | Width int `json:"width"`
15 | Height int `json:"height"`
16 | Encoding postgres.Jsonb `json:"encoding"`
17 | }
18 |
19 | // FaceRecognitionResult is the result of the face recognizer
20 | type FaceRecognitionResult struct {
21 | PostID int `json:"post_id"`
22 | Faces []*Face `json:"faces"`
23 | }
24 |
25 | // FaceReconJob represents the data for a face recon job
26 | type FaceReconJob struct {
27 | PostID int `json:"post_id"`
28 | InternalImageURL string `json:"internal_image_url"`
29 | X int `json:"x"`
30 | Y int `json:"y"`
31 | Width int `json:"width"`
32 | Height int `json:"height"`
33 | }
34 |
35 | // Face contains the position of a face in a post and its encoding
36 | type Face struct {
37 | X int `json:"x"`
38 | Y int `json:"y"`
39 | Width int `json:"width"`
40 | Height int `json:"height"`
41 | Encoding [128]float32 `json:"encoding"`
42 | }
43 |
--------------------------------------------------------------------------------
/face-recognition/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | recognition "github.com/codeuniversity/smag-mvp/face-recognition"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
13 | jobsReadTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECONED_TOPIC")
14 | jobsWriteTopic := utils.MustGetStringFromEnv("KAFKA_FACE_DETECTION_RESULTS_TOPIC")
15 | faceRecognizerAddress := utils.MustGetStringFromEnv("FACE_RECOGNIZER_ADDRESS")
16 | pictureBucketName := utils.MustGetStringFromEnv("S3_PICTURE_BUCKET_NAME")
17 | imgProxyAddress := utils.MustGetStringFromEnv("IMGPROXY_ADDRESS")
18 | imgProxyKey := utils.MustGetStringFromEnv("IMGPROXY_KEY")
19 | imgProxySalt := utils.MustGetStringFromEnv("IMGPROXY_SALT")
20 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic))
21 | qWriter := kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, jobsWriteTopic, true))
22 |
23 | r := recognition.New(qReader, qWriter, faceRecognizerAddress, pictureBucketName, imgProxyAddress, imgProxyKey, imgProxySalt)
24 |
25 | service.CloseOnSignal(r)
26 | waitUntilDone := r.Start()
27 |
28 | waitUntilDone()
29 | }
30 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/user/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka"
7 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter"
9 | "github.com/codeuniversity/smag-mvp/service"
10 | "github.com/codeuniversity/smag-mvp/utils"
11 |
12 | "github.com/neo4j/neo4j-go-driver/neo4j"
13 | )
14 |
15 | func main() {
16 | readerConfig := kafka.GetInserterConfig()
17 | neo4jConfig := utils.GetNeo4jConfig()
18 |
19 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), insertUsersAndFollowings)
20 |
21 | service.CloseOnSignal(i)
22 | waitUntilClosed := i.Start()
23 |
24 | waitUntilClosed()
25 | }
26 |
27 | type Follow struct {
28 | FromID int `json:"from_id"`
29 | ToID int `json:"to_id"`
30 | }
31 |
32 | func insertUsersAndFollowings(m *changestream.ChangeMessage, session neo4j.Session) error {
33 | const createUsersAndRelationships = `
34 | MERGE(u1:USER{id: $fromID})
35 | MERGE(u2:USER{id: $toID})
36 | MERGE(u1)-[:FOLLOWS]->(u2)
37 | `
38 | f := &Follow{}
39 | err := json.Unmarshal(m.Payload.After, f)
40 |
41 | if err != nil {
42 | return err
43 | }
44 |
45 | _, err = session.Run(createUsersAndRelationships, map[string]interface{}{"fromID": f.FromID, "toID": f.ToID})
46 |
47 | if err != nil {
48 | return err
49 | }
50 |
51 | return nil
52 | }
53 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/posts/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka"
7 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter"
9 | "github.com/codeuniversity/smag-mvp/service"
10 | "github.com/codeuniversity/smag-mvp/utils"
11 |
12 | "github.com/neo4j/neo4j-go-driver/neo4j"
13 | )
14 |
15 | func main() {
16 | readerConfig := kafka.GetInserterConfig()
17 | neo4jConfig := utils.GetNeo4jConfig()
18 |
19 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), insertPostsAndAddRelationship)
20 |
21 | service.CloseOnSignal(i)
22 | waitUntilClosed := i.Start()
23 |
24 | waitUntilClosed()
25 | }
26 |
27 | type Post struct {
28 | UserID int `json:"user_id"`
29 | PostID int `json:"id"`
30 | }
31 |
32 | func insertPostsAndAddRelationship(m *changestream.ChangeMessage, session neo4j.Session) error {
33 | const insertPostsAndAddRelationship = `
34 | MERGE(u:USER{id: $userID})
35 | MERGE(p:POST{id: $postID})
36 | MERGE(u)-[:POSTED]->(p)
37 | `
38 | p := &Post{}
39 | err := json.Unmarshal(m.Payload.After, p)
40 |
41 | if err != nil {
42 | return err
43 | }
44 |
45 | _, err = session.Run(insertPostsAndAddRelationship, map[string]interface{}{"userID": p.UserID, "postID": p.PostID})
46 |
47 | if err != nil {
48 | return err
49 | }
50 |
51 | return nil
52 | }
53 |
--------------------------------------------------------------------------------
/elastic/mapping.go:
--------------------------------------------------------------------------------
1 | package elastic
2 |
3 | const CommentsIndexMapping = `
4 | {
5 | "mappings" : {
6 | "properties" : {
7 | "comment" : {
8 | "type" : "text"
9 | },
10 | "post_id" : {
11 | "type" : "integer"
12 | }
13 | }
14 | }
15 | }
16 | `
17 |
18 | const FacesIndexMapping = `
19 | {
20 | "mappings" : {
21 | "properties" : {
22 | "encoding_vector": {
23 | "type": "binary",
24 | "doc_values": true
25 | },
26 | "post_id": {
27 | "type": "integer"
28 | },
29 | "x": {
30 | "type": "integer"
31 | },
32 | "y": {
33 | "type": "integer"
34 | },
35 | "width": {
36 | "type": "integer"
37 | },
38 | "height":{
39 | "type": "integer"
40 | }
41 | }
42 | }
43 | }
44 | `
45 |
46 | const PostsIndexMapping = `
47 | {
48 | "mappings" : {
49 | "properties" : {
50 | "caption" : {
51 | "type" : "text"
52 | },
53 | "user_id" : {
54 | "type" : "integer"
55 | }
56 | }
57 | }
58 | }
59 | `
60 |
61 | const UsersIndexMapping = `
62 | {
63 | "mappings" : {
64 | "properties" : {
65 | "id": {
66 | "type": "integer"
67 | }
68 | "user_name": {
69 | "type": "text"
70 | }
71 | "real_name": {
72 | "type": "text"
73 | }
74 | "bio": {
75 | "type": "text"
76 | }
77 | }
78 | }
79 | }
80 | `
81 |
--------------------------------------------------------------------------------
/insta/inserter/neo4j/tagged_users/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka"
7 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
8 | neo4jinserter "github.com/codeuniversity/smag-mvp/neo4j/inserter"
9 | "github.com/codeuniversity/smag-mvp/service"
10 | "github.com/codeuniversity/smag-mvp/utils"
11 |
12 | "github.com/neo4j/neo4j-go-driver/neo4j"
13 | )
14 |
15 | func main() {
16 |
17 | readerConfig := kafka.GetInserterConfig()
18 | neo4jConfig := utils.GetNeo4jConfig()
19 |
20 | i := neo4jinserter.New(neo4jConfig, kafka.NewReader(readerConfig), addTaggedUsersRelationship)
21 |
22 | service.CloseOnSignal(i)
23 | waitUntilClosed := i.Start()
24 |
25 | waitUntilClosed()
26 | }
27 |
28 | type taggedUser struct {
29 | UserID int `json:"user_id"`
30 | PostID int `json:"post_id"`
31 | }
32 |
33 | func addTaggedUsersRelationship(m *changestream.ChangeMessage, session neo4j.Session) error {
34 | const addTaggedRelationship = `
35 | MERGE(u:USER{id: $userID})
36 | MERGE(p:POST{id: $postID})
37 | MERGE(u)-[:TAGGED_ON]->(p)
38 | `
39 | t := &taggedUser{}
40 | err := json.Unmarshal(m.Payload.After, t)
41 |
42 | if err != nil {
43 | return err
44 | }
45 |
46 | _, err = session.Run(addTaggedRelationship, map[string]interface{}{"userID": t.UserID, "postID": t.PostID})
47 |
48 | if err != nil {
49 | return err
50 | }
51 |
52 | return nil
53 | }
54 |
--------------------------------------------------------------------------------
/kafka/changestream/change_message.go:
--------------------------------------------------------------------------------
1 | package changestream
2 |
3 | import (
4 | "encoding/json"
5 | )
6 |
7 | // ChangeMessage ...
8 | type ChangeMessage struct {
9 | Schema struct {
10 | Type string `json:"type"`
11 | Fields []struct {
12 | Type string `json:"type"`
13 | Fields []struct {
14 | Type string `json:"type"`
15 | Optional bool `json:"optional"`
16 | Field string `json:"field"`
17 | } `json:"fields,omitempty"`
18 | Optional bool `json:"optional"`
19 | Name string `json:"name,omitempty"`
20 | Field string `json:"field"`
21 | } `json:"fields"`
22 | Optional bool `json:"optional"`
23 | Name string `json:"name"`
24 | } `json:"schema"`
25 | Payload struct {
26 | Before json.RawMessage `json:"before"`
27 | After json.RawMessage `json:"after"`
28 | Source struct {
29 | Version string `json:"version"`
30 | Connector string `json:"connector"`
31 | Name string `json:"name"`
32 | TsMs int64 `json:"ts_ms"`
33 | Snapshot string `json:"snapshot"`
34 | Db string `json:"db"`
35 | Schema string `json:"schema"`
36 | Table string `json:"table"`
37 | TxID int `json:"txId"`
38 | Lsn int `json:"lsn"`
39 | Xmin interface{} `json:"xmin"`
40 | } `json:"source"`
41 | Op string `json:"op"`
42 | TsMs int64 `json:"ts_ms"`
43 | } `json:"payload"`
44 | }
45 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/posts_scraper.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import twint
4 |
5 | from .scraper_manager import ScraperManager
6 | from .utils import get_conf
7 |
8 |
9 | class TweetsScraper(ScraperManager):
10 | name = "tweets_scraper"
11 |
12 | @staticmethod
13 | def scrape(user_name: str):
14 | logging.info(f"Scrape tweets of user {user_name}")
15 |
16 | tweets = []
17 |
18 | c = get_conf(user_name)
19 | c.Store_object_tweets_list = tweets
20 |
21 | twint.run.Search(c)
22 | return tweets
23 |
24 |
25 | if __name__ == "__main__":
26 | import os
27 |
28 | log_level = logging.DEBUG if os.getenv("DEBUG", "false") == "true" else logging.INFO
29 |
30 | logging.basicConfig(
31 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s",
32 | datefmt="%H:%M:%S",
33 | level=log_level,
34 | )
35 |
36 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped")
37 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names")
38 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "tweets_scraper")
39 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092")
40 |
41 | tweets_scraper = TweetsScraper(
42 | insert_topic=insert_topic,
43 | fetch_topic=fetch_topic,
44 | kafka_consumer_group=kafka_consumer_group,
45 | kafka_address=kafka_address,
46 | )
47 | tweets_scraper.run()
48 |
--------------------------------------------------------------------------------
/insta/posts_face-detection/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | detection "github.com/codeuniversity/smag-mvp/insta/posts_face-detection"
5 | "github.com/codeuniversity/smag-mvp/kafka"
6 | "github.com/codeuniversity/smag-mvp/service"
7 | "github.com/codeuniversity/smag-mvp/utils"
8 | )
9 |
10 | func main() {
11 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
12 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
13 | jobsReadTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECON_TOPIC")
14 | jobsWriteTopic := utils.MustGetStringFromEnv("KAFKA_PICTURE_FACE_RECONED_TOPIC")
15 | qReader := kafka.NewReader(kafka.NewReaderConfig(kafkaAddress, groupID, jobsReadTopic))
16 | qWriter := kafka.NewWriter(kafka.NewWriterConfig(kafkaAddress, jobsWriteTopic, true))
17 |
18 | config := detection.Config{
19 | S3BucketName: utils.GetStringFromEnvWithDefault("S3_BUCKET_NAME", "insta_pics"),
20 | S3Region: utils.GetStringFromEnvWithDefault("S3_REGION", "eu-west-1"),
21 | S3Endpoint: utils.GetStringFromEnvWithDefault("S3_ENDOINT", "127.0.0.1:9000"),
22 | S3AccessKeyID: utils.MustGetStringFromEnv("S3_ACCESS_KEY_ID"),
23 | S3SecretAccessKey: utils.MustGetStringFromEnv("S3_SECRET_ACCESS_KEY"),
24 | S3UseSSL: utils.GetBoolFromEnvWithDefault("S3_USE_SSL", true),
25 | }
26 |
27 | d := detection.New(qReader, qWriter, config)
28 |
29 | service.CloseOnSignal(d)
30 | waitUntilDone := d.Start()
31 |
32 | waitUntilDone()
33 | }
34 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/users_scraper.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import twint
4 |
5 | from .scraper_manager import ScraperManager
6 | from .utils import get_conf
7 |
8 |
9 | def scrape(user_name: str) -> twint.user.user:
10 | conf = get_conf(user_name)
11 | twint.run.Lookup(conf)
12 | user = twint.output.users_list.pop()
13 | return user
14 |
15 |
16 | class UserScraper(ScraperManager):
17 | name = "user_scraper"
18 |
19 | @staticmethod
20 | def scrape(user_name: str):
21 | logging.info(f"Scrape user {user_name}")
22 | user = scrape(user_name)
23 | return user
24 |
25 |
26 | if __name__ == "__main__":
27 | import os
28 |
29 | log_level = logging.DEBUG if os.getenv("DEBUG", "false") == "true" else logging.INFO
30 |
31 | logging.basicConfig(
32 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s",
33 | datefmt="%H:%M:%S",
34 | level=log_level,
35 | )
36 |
37 |
38 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names")
39 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped")
40 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "user_scraper")
41 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092")
42 |
43 | user_scraper = UserScraper(
44 | insert_topic=insert_topic,
45 | fetch_topic=fetch_topic,
46 | kafka_consumer_group=kafka_consumer_group,
47 | kafka_address=kafka_address,
48 | )
49 | user_scraper.run()
50 |
--------------------------------------------------------------------------------
/insta/filter/user_names/filter_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestFilter(t *testing.T) {
11 | validPayloadJSON := []byte("{\"id\":1,\"user_name\":\"TestUser\"}")
12 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"user_name\":\"TestUser\"}")
13 |
14 | t.Run("create event with unmarshable json", func(t *testing.T) {
15 | //create test input
16 | changeMsg := &changestream.ChangeMessage{}
17 | changeMsg.Payload.Op = "c"
18 | changeMsg.Payload.After = validPayloadJSON
19 |
20 | kMessages, err := filterChange(changeMsg)
21 |
22 | assert.Nil(t, err, "no error")
23 | assert.Equal(t, 1, len(kMessages))
24 | assert.Equal(t, "TestUser", string(kMessages[0].Value))
25 | })
26 |
27 | t.Run("create event with not unmarshable json", func(t *testing.T) {
28 | //create test input
29 | changeMsg := &changestream.ChangeMessage{}
30 | changeMsg.Payload.Op = "c"
31 | changeMsg.Payload.After = invalidPayloadJSON
32 |
33 | kMessages, err := filterChange(changeMsg)
34 |
35 | assert.NotNil(t, err, "error occurs")
36 | assert.Nil(t, kMessages, "nil output")
37 | })
38 |
39 | t.Run("ignored event", func(t *testing.T) {
40 | //create test input
41 | changeMsg := &changestream.ChangeMessage{}
42 | changeMsg.Payload.Op = "u"
43 |
44 | kMessages, err := filterChange(changeMsg)
45 |
46 | assert.Nil(t, err, "no error")
47 | assert.Nil(t, kMessages, "nil output")
48 | })
49 | }
50 |
--------------------------------------------------------------------------------
/twitter/filter/user_names/filter_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestFilter(t *testing.T) {
11 | validPayloadJSON := []byte("{\"id\":1,\"username\":\"TestUser\"}")
12 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"username\":\"TestUser\"}")
13 |
14 | t.Run("create event with unmarshable json", func(t *testing.T) {
15 | //create test input
16 | changeMsg := &changestream.ChangeMessage{}
17 | changeMsg.Payload.Op = "c"
18 | changeMsg.Payload.After = validPayloadJSON
19 |
20 | kMessages, err := filterChange(changeMsg)
21 |
22 | assert.Nil(t, err, "no error")
23 | assert.Equal(t, 1, len(kMessages))
24 | assert.Equal(t, "TestUser", string(kMessages[0].Value))
25 | })
26 |
27 | t.Run("create event with not unmarshable json", func(t *testing.T) {
28 | //create test input
29 | changeMsg := &changestream.ChangeMessage{}
30 | changeMsg.Payload.Op = "c"
31 | changeMsg.Payload.After = invalidPayloadJSON
32 |
33 | kMessages, err := filterChange(changeMsg)
34 |
35 | assert.NotNil(t, err, "error occurs")
36 | assert.Nil(t, kMessages, "nil output")
37 | })
38 |
39 | t.Run("ignored event", func(t *testing.T) {
40 | //create test input
41 | changeMsg := &changestream.ChangeMessage{}
42 | changeMsg.Payload.Op = "u"
43 |
44 | kMessages, err := filterChange(changeMsg)
45 |
46 | assert.Nil(t, err, "no error")
47 | assert.Nil(t, kMessages, "nil output")
48 | })
49 | }
50 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Description
2 |
3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
4 |
5 | Fixes # (issue)
6 |
7 | ## Type of change
8 |
9 | - [ ] Bug fix (non-breaking change which fixes an issue)
10 | - [ ] New feature (non-breaking change which adds functionality)
11 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
12 | - [ ] This change requires a documentation update
13 | - [ ] Cleanup (changes in structure but not functionality)
14 |
15 | # How Has This Been Tested?
16 |
17 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
18 |
19 | - [ ] Test A
20 | - [ ] Test B
21 |
22 | **Test Configuration**:
23 | * Go version:
24 | * Kafka version:
25 | * Kubernetes version:
26 |
27 | # Checklist:
28 |
29 | - [ ] My code follows the style guidelines of this project
30 | - [ ] I have performed a self-review of my own code
31 | - [ ] I have commented my code, particularly in hard-to-understand areas
32 | - [ ] I have made corresponding changes to the documentation
33 | - [ ] My changes generate no new warnings
34 | - [ ] I have added tests that prove my fix is effective or that my feature works
35 | - [ ] New and existing unit tests pass locally with my changes
36 | - [ ] Any dependent changes have been merged and published in downstream modules
37 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "react-tutorial",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "@fortawesome/fontawesome-svg-core": "^1.2.25",
7 | "@fortawesome/free-solid-svg-icons": "^5.11.2",
8 | "@fortawesome/react-fontawesome": "^0.1.7",
9 | "@improbable-eng/grpc-web": "^0.11.0",
10 | "@types/google-protobuf": "^3.7.2",
11 | "bootstrap": "^4.3.1",
12 | "google-protobuf": "^3.11.0",
13 | "grpc-web": "^1.0.6",
14 | "lodash": "^4.17.15",
15 | "neovis.js": "^1.2.1",
16 | "prop-types": "^15.7.2",
17 | "react": "^16.10.1",
18 | "react-bootstrap": "^1.0.0-beta.12",
19 | "react-dom": "^16.10.1",
20 | "react-graph-vis": "^1.0.5",
21 | "react-router": "^5.1.2",
22 | "react-router-dom": "^5.1.2",
23 | "react-scripts": "3.3.0",
24 | "react-slideshow-image": "^1.3.1"
25 | },
26 | "scripts": {
27 | "start": "react-scripts start",
28 | "build": "react-scripts build",
29 | "test": "react-scripts test",
30 | "eject": "react-scripts eject"
31 | },
32 | "eslintConfig": {
33 | "extends": "react-app"
34 | },
35 | "browserslist": {
36 | "production": [
37 | ">0.2%",
38 | "not dead",
39 | "not op_mini all"
40 | ],
41 | "development": [
42 | "last 1 chrome version",
43 | "last 1 firefox version",
44 | "last 1 safari version"
45 | ]
46 | },
47 | "husky": {
48 | "hooks": {
49 | "pre-commit": "pretty-quick --staged"
50 | }
51 | },
52 | "devDependencies": {
53 | "husky": "^3.1.0",
54 | "prettier": "1.19.1",
55 | "pretty-quick": "^2.0.1"
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/faces/server.py:
--------------------------------------------------------------------------------
1 |
2 | from concurrent import futures
3 | import time
4 | import grpc
5 | import os
6 | import prometheus_client
7 |
8 | import recognizer_pb2_grpc as grpc_proto
9 | import recognizer_pb2 as proto
10 | import recognizer
11 | import metrics
12 |
13 |
14 | class Servicer(grpc_proto.FaceRecognizerServicer):
15 | RECOGNIZE_HISTOGRAM = metrics.request_latency_histogram.labels(
16 | 'recognize_faces')
17 |
18 | @RECOGNIZE_HISTOGRAM.time()
19 | def RecognizeFaces(self, request, context):
20 |
21 | faces = recognizer.recognize(request.url)
22 | proto_faces = []
23 | for face in faces:
24 | area = face['area']
25 | encoding = face['encoding']
26 | proto_face = proto.Face(x=area['x'], y=area['y'], width=area['width'],
27 | height=area['height'], encoding=encoding)
28 | proto_faces.append(proto_face)
29 |
30 | metrics.request_counter.labels('recognize_faces').inc()
31 |
32 | return proto.RegognizeResponse(faces=proto_faces)
33 |
34 |
35 | def serve():
36 |
37 | server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
38 |
39 | grpc_proto.add_FaceRecognizerServicer_to_server(
40 | Servicer(),
41 | server
42 | )
43 |
44 | server.add_insecure_port('[::]:' + os.environ['GRPC_PORT'])
45 |
46 | server.start()
47 |
48 | try:
49 | while True:
50 | time.sleep(60 * 60 * 24)
51 | except KeyboardInterrupt:
52 | server.stop(0)
53 |
54 |
55 | if __name__ == '__main__':
56 | prometheus_client.start_http_server(int(os.environ['METRICS_PORT']))
57 | serve()
58 |
--------------------------------------------------------------------------------
/db/README.md:
--------------------------------------------------------------------------------
1 | # postgres database
2 |
3 | We are using [POSTGRESQL](https://www.postgresql.org/) as the store for the raw scraped data from the various data sources.
4 | The schemas are quite similar to the scraped data structures.
5 |
6 | **Table of Contents**
7 |
8 | - [Instagram](#instagram)
9 | - [Remarks](#remarks)
10 | - [Twitter](#twitter)
11 | - [Debezium](#debezium)
12 |
13 | ## [Instagram](https://www.instagram.com/)
14 |
15 | This database is the more sophisticated one and is running in production.
16 |
17 | 
18 |
19 | ### Remarks
20 |
21 | - `internal_picture_url` is pointing to the downloaded picture on S3
22 |
23 | ## Twitter
24 |
25 | This database is not in production yet and at the moment only dumps the tweaked scraped data.
26 |
27 | 
28 |
29 | ## Debezium
30 |
31 | The [debezium](https://github.com/debezium/debezium) connector generates a change stream from all change events in postgres (`read`, `create`, `update`, `delete`) and writes them into a kafka-topic `"postgres.public."`
32 |
33 | To read from this stream you can:
34 |
35 | - get [`kafkacat`](https://github.com/edenhill/kafkacat)
36 | - inspect the topic list in kafka:
37 | ```bash
38 | $ kafkacat -L -b my-kafka | grep 'topic "postgres'
39 | ```
40 | - consume a topic with
41 | ```bash
42 | $ kafkacat -b my-kafka -t
43 | ```
44 |
45 | The messages are quite verbose, since they include their own schema description. The most interesting part is the `value.payload`:
46 |
47 | ```bash
48 | $ kafkacat -b my-kafka -topic postgres.public.users | jq '.value | fromjson | .payload'`
49 | ```
50 |
--------------------------------------------------------------------------------
/faces/recognizer_pb2_grpc.py:
--------------------------------------------------------------------------------
1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2 | import grpc
3 |
4 | import recognizer_pb2 as recognizer__pb2
5 |
6 |
7 | class FaceRecognizerStub(object):
8 | # missing associated documentation comment in .proto file
9 | pass
10 |
11 | def __init__(self, channel):
12 | """Constructor.
13 |
14 | Args:
15 | channel: A grpc.Channel.
16 | """
17 | self.RecognizeFaces = channel.unary_unary(
18 | '/proto.FaceRecognizer/RecognizeFaces',
19 | request_serializer=recognizer__pb2.RecognizeRequest.SerializeToString,
20 | response_deserializer=recognizer__pb2.RegognizeResponse.FromString,
21 | )
22 |
23 |
24 | class FaceRecognizerServicer(object):
25 | # missing associated documentation comment in .proto file
26 | pass
27 |
28 | def RecognizeFaces(self, request, context):
29 | # missing associated documentation comment in .proto file
30 | pass
31 | context.set_code(grpc.StatusCode.UNIMPLEMENTED)
32 | context.set_details('Method not implemented!')
33 | raise NotImplementedError('Method not implemented!')
34 |
35 |
36 | def add_FaceRecognizerServicer_to_server(servicer, server):
37 | rpc_method_handlers = {
38 | 'RecognizeFaces': grpc.unary_unary_rpc_method_handler(
39 | servicer.RecognizeFaces,
40 | request_deserializer=recognizer__pb2.RecognizeRequest.FromString,
41 | response_serializer=recognizer__pb2.RegognizeResponse.SerializeToString,
42 | ),
43 | }
44 | generic_handler = grpc.method_handlers_generic_handler(
45 | 'proto.FaceRecognizer', rpc_method_handlers)
46 | server.add_generic_rpc_handlers((generic_handler,))
47 |
--------------------------------------------------------------------------------
/http_header-generator/generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "encoding/json"
5 | "io/ioutil"
6 | "math/rand"
7 | "net/http"
8 | "time"
9 | )
10 |
11 | // HTTPHeaderGenerator generates headers for http requests for scraping
12 | type HTTPHeaderGenerator struct {
13 | browserAgent browserAgent
14 | }
15 |
16 | // New returns an initialized HTTPHeaderGenerator
17 | func New() *HTTPHeaderGenerator {
18 | generator := &HTTPHeaderGenerator{}
19 | data, err := ioutil.ReadFile("http_header-generator/useragents.json")
20 | if err != nil {
21 | panic(err)
22 | }
23 | var userAgent browserAgent
24 | err = json.Unmarshal(data, &userAgent)
25 |
26 | if err != nil {
27 | panic(err)
28 | }
29 | generator.browserAgent = userAgent
30 | return generator
31 | }
32 |
33 | type browserAgent []struct {
34 | UserAgents string `json:"useragent"`
35 | }
36 |
37 | // AddHeaders ads the generated headers to the request headers
38 | func (h *HTTPHeaderGenerator) AddHeaders(header *http.Header) {
39 | header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3")
40 | header.Add("Accept-Charset", "utf-8")
41 | header.Add("Accept-Language", "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7")
42 | header.Add("Cache-Control", "no-cache")
43 | header.Add("Content-Type", "application/json; charset=utf-8")
44 | header.Add("User-Agent", h.GetRandomUserAgent())
45 | }
46 |
47 | // GetRandomUserAgent returns a random user agent
48 | func (h *HTTPHeaderGenerator) GetRandomUserAgent() string {
49 | rand.Seed(time.Now().UnixNano())
50 | randomNumber := rand.Intn(len(h.browserAgent))
51 | return h.browserAgent[randomNumber].UserAgents
52 | }
53 |
--------------------------------------------------------------------------------
/api/envoy-proxy/envoy.yaml:
--------------------------------------------------------------------------------
1 | admin:
2 | access_log_path: /tmp/admin_access.log
3 | address:
4 | socket_address: { address: 0.0.0.0, port_value: 9901 }
5 |
6 | static_resources:
7 | listeners:
8 | - name: listener_0
9 | address:
10 | socket_address: { address: 0.0.0.0, port_value: 8080 }
11 | filter_chains:
12 | - filters:
13 | - name: envoy.http_connection_manager
14 | config:
15 | codec_type: auto
16 | stat_prefix: ingress_http
17 | route_config:
18 | name: local_route
19 | virtual_hosts:
20 | - name: local_service
21 | domains: ["*"]
22 | routes:
23 | - match: { prefix: "/" }
24 | route:
25 | cluster: user_search_service
26 | max_grpc_timeout: 0s
27 | cors:
28 | allow_origin:
29 | - "*"
30 | allow_methods: GET, PUT, DELETE, POST, OPTIONS
31 | allow_headers: keep-alive,user-agent,cache-control,content-type,content-transfer-encoding,custom-header-1,x-accept-content-transfer-encoding,x-accept-response-streaming,x-user-agent,x-grpc-web,grpc-timeout
32 | max_age: "1728000"
33 | expose_headers: custom-header-1,grpc-status,grpc-message
34 | http_filters:
35 | - name: envoy.grpc_web
36 | - name: envoy.cors
37 | - name: envoy.router
38 | clusters:
39 | - name: user_search_service
40 | connect_timeout: 0.25s
41 | type: logical_dns
42 | http2_protocol_options: {}
43 | lb_policy: round_robin
44 | hosts: [{ socket_address: { address: grpc-server, port_value: 10000 }}]
45 |
--------------------------------------------------------------------------------
/insta/scraper/likes/insta-model.go:
--------------------------------------------------------------------------------
1 | package insta_likes_scraper
2 |
3 | type InstaPostLikes struct {
4 | Data struct {
5 | ShortcodeMedia struct {
6 | ID string `json:"id"`
7 | Shortcode string `json:"shortcode"`
8 | EdgeLikedBy struct {
9 | Count int `json:"count"`
10 | PageInfo struct {
11 | HasNextPage bool `json:"has_next_page"`
12 | EndCursor string `json:"end_cursor"`
13 | } `json:"page_info"`
14 | Edges []struct {
15 | Node struct {
16 | ID string `json:"id"`
17 | Username string `json:"username"`
18 | FullName string `json:"full_name"`
19 | ProfilePicURL string `json:"profile_pic_url"`
20 | IsPrivate bool `json:"is_private"`
21 | IsVerified bool `json:"is_verified"`
22 | FollowedByViewer bool `json:"followed_by_viewer"`
23 | RequestedByViewer bool `json:"requested_by_viewer"`
24 | Reel struct {
25 | ID string `json:"id"`
26 | ExpiringAt int `json:"expiring_at"`
27 | HasPrideMedia bool `json:"has_pride_media"`
28 | LatestReelMedia interface{} `json:"latest_reel_media"`
29 | Seen interface{} `json:"seen"`
30 | Owner struct {
31 | Typename string `json:"__typename"`
32 | ID string `json:"id"`
33 | ProfilePicURL string `json:"profile_pic_url"`
34 | Username string `json:"username"`
35 | } `json:"owner"`
36 | } `json:"reel"`
37 | } `json:"node"`
38 | } `json:"edges"`
39 | } `json:"edge_liked_by"`
40 | } `json:"shortcode_media"`
41 | } `json:"data"`
42 | Status string `json:"status"`
43 | }
44 |
--------------------------------------------------------------------------------
/frontend/src/index.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import ReactDOM from "react-dom";
3 | import "./index.css";
4 | import { Route, Link, BrowserRouter as Router, Switch } from "react-router-dom";
5 | import App from "./App";
6 | import { BrowserRouter } from "react-router-dom";
7 | import Result from "./components/Result";
8 | import Dashboard from "./pages/Dashboard";
9 | import Notfound from "./notfound";
10 | import FlowWrapper from "./components/FlowWrapper";
11 | import Popup from "./components/Popup";
12 | import Greeting from "./pages/Greeting.jsx";
13 | import Network from "./pages/Network.jsx";
14 | import Endscreen from "./pages/endscreen";
15 | import SearchProfile from "./pages/SearchProfile";
16 | import ExampleProfileSelection from "./pages/ExampleProfileSelection";
17 |
18 | const root = document.getElementById("root");
19 |
20 | ReactDOM.render(
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
38 |
39 |
40 |
41 | ,
42 | root
43 | );
44 |
--------------------------------------------------------------------------------
/elastic/models/face.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import (
4 | "encoding/base64"
5 | "encoding/binary"
6 | "encoding/json"
7 | "math"
8 |
9 | "github.com/codeuniversity/smag-mvp/insta/models"
10 | )
11 |
12 | // FaceDoc is the type that is used to store a face in elasticsearch
13 | type FaceDoc struct {
14 | PostID int `json:"post_id"`
15 | X int `json:"x"`
16 | Y int `json:"y"`
17 | Width int `json:"width"`
18 | Height int `json:"height"`
19 | EncodingVector string `json:"encoding_vector"`
20 | }
21 |
22 | // FaceDocFromFaceData returns a FaceDoc with an encoded `EncodingVector` given a faceData model
23 | func FaceDocFromFaceData(faceData *models.FaceData) (*FaceDoc, error) {
24 | var encodingString string
25 | err := json.Unmarshal(faceData.Encoding.RawMessage, &encodingString)
26 | if err != nil {
27 | return nil, err
28 | }
29 | encoding := []float32{}
30 | err = json.Unmarshal([]byte(encodingString), &encoding)
31 | if err != nil {
32 | return nil, err
33 | }
34 | return &FaceDoc{
35 | PostID: faceData.PostID,
36 | X: faceData.X,
37 | Y: faceData.Y,
38 | Width: faceData.Width,
39 | Height: faceData.Height,
40 | EncodingVector: EncodedVector(encoding),
41 | }, nil
42 | }
43 |
44 | // EncodedVector for the given encoding, used for searching and looking up faces in elastic search
45 | func EncodedVector(encoding []float32) string {
46 | bytes := make([]byte, 0, 4*len(encoding))
47 | for _, a := range encoding {
48 | bits := math.Float32bits(a)
49 | b := make([]byte, 4)
50 | binary.BigEndian.PutUint32(b, bits)
51 | bytes = append(bytes, b...)
52 | }
53 |
54 | encoded := base64.StdEncoding.EncodeToString(bytes)
55 | return encoded
56 | }
57 |
--------------------------------------------------------------------------------
/faces/recognizer.py:
--------------------------------------------------------------------------------
1 | import face_recognition
2 | import tempfile
3 | import os
4 | import requests
5 | import random
6 | import string
7 |
8 |
9 | def recognize(url):
10 | image = download_and_read_image(url)
11 | if image is None:
12 | return []
13 |
14 | locations = face_recognition.face_locations(image)
15 | encodings = face_recognition.face_encodings(
16 | image, known_face_locations=locations)
17 |
18 | img_height = len(image)
19 | img_width = len(image[0])
20 |
21 | faces = []
22 | for index, location in enumerate(locations):
23 | encoding = encodings[index]
24 | (top, right, bottom, left) = location
25 | x = left
26 | y = top
27 | width = right - left
28 | height = bottom - top
29 |
30 | area = {
31 | "x": x,
32 | "y": y,
33 | "width": width,
34 | "height": height,
35 | }
36 | faces.append({
37 | "area": area,
38 | "encoding": encoding,
39 | })
40 | return faces
41 |
42 |
43 | def download_and_read_image(url):
44 | file_name = random_string() + ".jpg"
45 | with open(file_name, 'wb') as handle:
46 | response = requests.get(url, stream=True)
47 |
48 | if not response.ok:
49 | print("failed to get file with name: " + file_name)
50 | return None
51 |
52 | for block in response.iter_content(1024):
53 | if not block:
54 | break
55 |
56 | handle.write(block)
57 | image = face_recognition.load_image_file(file_name)
58 |
59 | os.remove(file_name)
60 |
61 | return image
62 |
63 |
64 | def random_string(n=12):
65 | return ''.join(random.choices(string.ascii_uppercase + string.digits, k=n))
66 |
--------------------------------------------------------------------------------
/frontend/src/pages/SearchProfile.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import Form from "../components/Form";
3 | import H2 from "../components/H2";
4 | import Button from "../components/Button";
5 | import Result from "../components/Result";
6 | import {
7 | User,
8 | UserNameRequest,
9 | UserIdRequest,
10 | Post,
11 | UserIdResponse,
12 | UserSearchResponse
13 | } from "../protofiles/usersearch_pb.js";
14 | import { UserSearchServiceClient } from "../protofiles/usersearch_grpc_web_pb";
15 | import { withRouter } from "react-router";
16 | import PropTypes from "prop-types";
17 |
18 | // eslint-disable-next-line
19 |
20 | class SearchProfile extends Component {
21 | handleSubmit = async userName => {
22 | const requestUser = new UserNameRequest();
23 | requestUser.setUserName(userName);
24 | const response = await this.props.apiClient.getUserWithUsername(
25 | requestUser
26 | );
27 | const user = response.toObject();
28 | const profile = { facesList: [], weight: 0, user: user };
29 | this.props.onProfileSelect(profile);
30 | };
31 | render() {
32 | return (
33 |
34 |
35 |
We couldn't find you. Please enter your instagram username.
36 |
37 |
38 |
41 |
42 |
43 | );
44 | }
45 | }
46 |
47 | SearchProfile.propTypes = {
48 | history: PropTypes.shape({
49 | push: PropTypes.shape({
50 | pathname: PropTypes.string,
51 | state: PropTypes.object
52 | })
53 | })
54 | };
55 |
56 | export default withRouter(SearchProfile);
57 |
--------------------------------------------------------------------------------
/nlp/frequency-analyzer/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io/ioutil"
7 | "log"
8 | "os"
9 | "strconv"
10 |
11 | analyzer "github.com/codeuniversity/smag-mvp/nlp/frequency-analyzer"
12 | )
13 |
14 | func main() {
15 | if len(os.Args) != 2 {
16 | log.Fatal("Num of args isn't 1.\nUsage: go run main.go ")
17 | }
18 | userID, err := strconv.Atoi(os.Args[1])
19 | if err != nil {
20 | log.Fatalf("Unable to convert user_id=%+v to integer.\nsage: go run main.go ", userID)
21 | }
22 |
23 | a := analyzer.New([]string{"http://127.0.0.1:9200"})
24 | log.Printf("a=%+v", a)
25 |
26 | // TODO: load cities.json
27 | jsonFile, err := os.Open("nlp/frequency-analyzer/cities.json")
28 | defer jsonFile.Close()
29 | if err != nil {
30 | log.Fatalln(err)
31 | }
32 | byteValue, err := ioutil.ReadAll(jsonFile)
33 | var cityMap map[string][]string
34 | if err := json.Unmarshal(byteValue, &cityMap); err != nil {
35 | panic(err)
36 | }
37 | log.Printf("cityMap=%+v", cityMap)
38 |
39 | foundCities := make(map[string]bool)
40 | for city, cityTerms := range cityMap {
41 | foundTerms, err := a.MatchTermsForUser(userID, cityTerms)
42 | if err != nil {
43 | panic(err)
44 | }
45 | log.Printf("city=%v \t-> foundTerms=%+v", city, foundTerms)
46 | // check if there are results for city
47 | if len(foundTerms) > 0 {
48 | foundCities[city] = true
49 | } else {
50 | foundCities[city] = false
51 | }
52 | }
53 | log.Printf("foundCities=%+v", foundCities)
54 |
55 | res := make([]string, 0, len(foundCities))
56 | log.Printf("Could identify following cities for user=%v: {", userID)
57 | for city, found := range foundCities {
58 | if found == true {
59 | res = append(res, city)
60 | fmt.Printf(" * %v\n", city)
61 | }
62 | }
63 | fmt.Println("}")
64 | }
65 |
--------------------------------------------------------------------------------
/frontend/src/pages/ExampleProfileSelection.jsx:
--------------------------------------------------------------------------------
1 | import React, { Component, useState } from "react";
2 | import { withRouter, history } from "react-router";
3 | import Button from "../components/Button";
4 | import H1 from "../components/H1";
5 | import H2 from "../components/H2";
6 | import {
7 | User,
8 | UserNameRequest,
9 | UserIdRequest,
10 | Post,
11 | UserIdResponse,
12 | UserSearchResponse
13 | } from "../protofiles/usersearch_pb.js";
14 | import { UserSearchServiceClient } from "../protofiles/usersearch_grpc_web_pb";
15 | async function fetchExampleProfile(apiClient) {
16 | const requestUser = new UserNameRequest();
17 |
18 | requestUser.setUserName("codeuniversity");
19 | const response = await apiClient.getUserWithUsername(requestUser);
20 | const user = response.toObject();
21 | return user;
22 | }
23 | function ExampleProfileSelection(props) {
24 | return (
25 |
26 |
27 |
We couldn't find any data of you.
28 |
29 |
30 | Would you like to continue the exhibition with the profile of CODE?
31 |
32 |
33 |
34 |
35 |
36 |
37 |
47 |
48 |
49 |
50 |
51 | );
52 | }
53 |
54 | export default ExampleProfileSelection;
55 |
--------------------------------------------------------------------------------
/frontend/src/App.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 | import Form from "./components/Form";
3 | import H1 from "./components/H1";
4 | import Result from "./components/Result";
5 | import "./index.css";
6 | import {
7 | User,
8 | UserNameRequest,
9 | UserIdRequest,
10 | Post,
11 | UserIdResponse,
12 | UserSearchResponse
13 | } from "./protofiles/usersearch_pb.js";
14 | import { UserSearchServiceClient } from "./protofiles/usersearch_grpc_web_pb";
15 | import { withRouter } from "react-router";
16 | import PropTypes from "prop-types";
17 |
18 | // eslint-disable-next-line
19 |
20 | class App extends Component {
21 | handleSubmit = userName => {
22 | const userSearch = new UserSearchServiceClient("http://localhost:4000");
23 |
24 | const requestUser = new UserNameRequest();
25 |
26 | requestUser.setUserName(userName);
27 | userSearch.getUserWithUsername(requestUser, {}, (err, response) => {
28 | if (err) {
29 | console.log(err);
30 | return;
31 | }
32 | const user = {
33 | id: response.getId(),
34 | bio: response.getBio(),
35 | avatarurl: response.getAvatarUrl(),
36 | username: response.getUserName(),
37 | realname: response.getRealName()
38 | };
39 |
40 | this.props.history.push({
41 | pathname: "/result",
42 | state: { user }
43 | });
44 | });
45 | };
46 | render() {
47 | return (
48 |
49 |
50 |
Find out your public digital identity!
51 |
52 |
53 |
54 | );
55 | }
56 | }
57 |
58 | App.propTypes = {
59 | history: PropTypes.shape({
60 | push: PropTypes.shape({
61 | pathname: PropTypes.string,
62 | state: PropTypes.object
63 | })
64 | })
65 | };
66 |
67 | export default withRouter(App);
68 |
--------------------------------------------------------------------------------
/frontend/src/components/Start.js:
--------------------------------------------------------------------------------
1 | import React, { useState } from "react";
2 | import { CameraFeed } from "./camera-feed";
3 | import H1 from "./H1";
4 | import H2 from "./H2";
5 | import { FaceSearchRequest } from "../protofiles/usersearch_pb";
6 | import IGPost from "./IGPost";
7 |
8 | function findFacesInImage(apiClient, onFindFaces) {
9 | return async file => {
10 | const reader = new FileReader();
11 | reader.onloadend = async () => {
12 | const dataUrl = reader.result;
13 | const base64Data = dataUrl.split(",")[1];
14 |
15 | const request = new FaceSearchRequest();
16 | request.setBase64encodedpicture(base64Data);
17 | const response = await apiClient.searchSimilarFaces(request);
18 | const faces = response.getFacesList().map(protoFace => ({
19 | postId: protoFace.getPostId(),
20 | x: protoFace.getX(),
21 | y: protoFace.getY(),
22 | width: protoFace.getWidth(),
23 | height: protoFace.getHeight(),
24 | fullImageSrc: protoFace.getFullImageSrc()
25 | }));
26 |
27 | onFindFaces(faces);
28 | };
29 | reader.readAsDataURL(file);
30 | };
31 | }
32 |
33 | function Start({ apiClient, faceHits, addFaceHits, progress }) {
34 | const onFileSubmit = findFacesInImage(apiClient, addFaceHits);
35 |
36 | return (
37 |
38 |
39 |
40 | Are you aware that wherever you are recorded,
41 |
your identity can be found?
42 |
43 |
44 |
53 |
54 |
55 |
56 | );
57 | }
58 |
59 | export default Start;
60 |
--------------------------------------------------------------------------------
/imgproxy/urlbuilder.go:
--------------------------------------------------------------------------------
1 | package imgproxy
2 |
3 | import (
4 | "crypto/hmac"
5 | "crypto/sha256"
6 | "encoding/base64"
7 | "encoding/hex"
8 | "errors"
9 | "fmt"
10 | )
11 |
12 | // URLBuilder simplifies constructing urls for the imgproxy
13 | type URLBuilder struct {
14 | keyBin []byte
15 | saltBin []byte
16 | proxyAddress string
17 | }
18 |
19 | // New returns a URLBuilder or an error if the salt or key is not hex-encoded
20 | func New(proxyAddress, key, salt string) (*URLBuilder, error) {
21 | var keyBin, saltBin []byte
22 | var err error
23 |
24 | if keyBin, err = hex.DecodeString(key); err != nil {
25 | return nil, errors.New("Key expected to be hex-encoded string")
26 | }
27 |
28 | if saltBin, err = hex.DecodeString(salt); err != nil {
29 | return nil, errors.New("Salt expected to be hex-encoded string")
30 | }
31 |
32 | return &URLBuilder{
33 | keyBin: keyBin,
34 | saltBin: saltBin,
35 | proxyAddress: proxyAddress,
36 | }, nil
37 | }
38 |
39 | // GetCropURL returns a url that instructs the imgproxy to crop out the given cordinates of the image
40 | // with a sourceURL in the form "s3:///" the image proxy will download the image from s3
41 | func (b *URLBuilder) GetCropURL(x, y, width, height int, sourceURL string) string {
42 | encodedURL := base64.RawURLEncoding.EncodeToString([]byte(sourceURL))
43 | gravity := fmt.Sprintf("nowe:%d:%d", x, y)
44 | extension := "jpg"
45 | path := fmt.Sprintf("/crop:%d:%d:%s/%s.%s", width, height, gravity, encodedURL, extension)
46 |
47 | mac := hmac.New(sha256.New, b.keyBin)
48 | mac.Write(b.saltBin)
49 | mac.Write([]byte(path))
50 | signature := base64.RawURLEncoding.EncodeToString(mac.Sum(nil))[:32]
51 |
52 | return fmt.Sprintf("http://%s/%s%s", b.proxyAddress, signature, path)
53 | }
54 |
55 | // GetS3Url returns a url in the form "s3:///"
56 | func (b *URLBuilder) GetS3Url(bucketName, path string) string {
57 | return fmt.Sprintf("s3://%s/%s", bucketName, path)
58 | }
59 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/following_scraper.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import twint
4 |
5 | from .scraper_manager import ScraperManager
6 | from .utils import get_conf, ShallowTwitterUser
7 |
8 |
9 | def scrape(user_name: str) -> twint.user.user:
10 | user = ShallowTwitterUser(user_name)
11 |
12 | conf = get_conf(user_name)
13 | user.following_list = scrape_follows_list(twint.run.Following, conf)
14 |
15 | return user
16 |
17 |
18 | def scrape_follows_list(func, conf: twint.Config) -> list:
19 | func(conf)
20 |
21 | # if we only scrape user names (set conf.User_full = False) user names are in follows_list
22 | # if we scrape profiles of follows (set conf.User_full = True) user objs are in users_list
23 | ret = []
24 | ret.extend(twint.output.follows_list)
25 | ret.extend(twint.output.users_list)
26 | twint.output.follows_list = []
27 | twint.output.users_list = []
28 | return ret
29 |
30 |
31 | class FollowingScraper(ScraperManager):
32 | name = "following_scraper"
33 |
34 | @staticmethod
35 | def scrape(user_name: str):
36 | logging.info(f"scrape user {user_name}s followings")
37 | user = scrape(user_name)
38 | return user
39 |
40 |
41 | if __name__ == "__main__":
42 | import os
43 |
44 | logging.basicConfig(
45 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s",
46 | datefmt="%H:%M:%S",
47 | level=logging.INFO,
48 | )
49 |
50 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names")
51 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped")
52 | kafka_consumer_group = os.getenv("KAFKA_CONSUMER_GROUP", "following_scraper")
53 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092")
54 |
55 | following_scraper = FollowingScraper(
56 | insert_topic=insert_topic,
57 | fetch_topic=fetch_topic,
58 | kafka_consumer_group=kafka_consumer_group,
59 | kafka_address=kafka_address,
60 | )
61 | following_scraper.run()
62 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/followers_scraper.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import twint
4 |
5 | from .scraper_manager import ScraperManager
6 | from .utils import get_conf, ShallowTwitterUser
7 |
8 |
9 | def scrape(user_name: str) -> twint.user.user:
10 | user = ShallowTwitterUser(user_name)
11 |
12 | conf = get_conf(user_name)
13 | user.followers_list = scrape_follows_list(twint.run.Followers, conf)
14 |
15 | return user
16 |
17 |
18 | def scrape_follows_list(func, conf: twint.Config) -> list:
19 | func(conf)
20 |
21 | # if we only scrape user names (set conf.User_full = False) user names are in follows_list
22 | # if we scrape profiles of follows (set conf.User_full = True) user objs are in users_list
23 | ret = []
24 | ret.extend(twint.output.follows_list)
25 | ret.extend(twint.output.users_list)
26 | twint.output.follows_list = []
27 | twint.output.users_list = []
28 | return ret
29 |
30 |
31 | class FollowersScraper(ScraperManager):
32 | name = "follower_scraper"
33 |
34 | @staticmethod
35 | def scrape(user_name: str):
36 | logging.info(f"scrape user {user_name}s followers")
37 | user = scrape(user_name)
38 | return user
39 |
40 |
41 | if __name__ == "__main__":
42 | import os
43 |
44 | logging.basicConfig(
45 | format="%(asctime)s.%(msecs)03d - %(module)s - %(levelname)s - %(message)s",
46 | datefmt="%H:%M:%S",
47 | level=logging.INFO,
48 | )
49 |
50 | fetch_topic = os.getenv("KAFKA_FETCH_TOPIC", "user_names")
51 | insert_topic = os.getenv("KAFKA_INSERT_TOPIC", "users_scraped")
52 | kafka_consumer_group = os.getenv(
53 | "KAFKA_CONSUMER_GROUP", "followers_scraper")
54 | kafka_address = os.getenv("KAFKA_ADDRESS", "localhost:9092")
55 |
56 | follower_scraper = FollowersScraper(
57 | insert_topic=insert_topic,
58 | fetch_topic=fetch_topic,
59 | kafka_consumer_group=kafka_consumer_group,
60 | kafka_address=kafka_address,
61 | )
62 | follower_scraper.run()
63 |
--------------------------------------------------------------------------------
/frontend/src/components/FaceHitAnimation.js:
--------------------------------------------------------------------------------
1 | import React, { useState, useEffect } from "react";
2 | import "./FaceHitAnimation.css";
3 |
4 | function FaceHitAnimation(props) {
5 | // hack to freeze faceHits on first mount
6 | const [faceHits, setFaceHits] = useState({});
7 | useEffect(() => {
8 | setFaceHits(props.faceHits);
9 | }, []);
10 |
11 | const images = Object.entries(faceHits)
12 | .map(([postId, faces]) => ({
13 | postId,
14 | weight: faces.length,
15 | imageSrc: faces[0].fullImageSrc
16 | }))
17 | .sort((a, b) => b.weigth - a.weight)
18 | .reverse();
19 |
20 | const [imageCoordinates, setImageCoordinates] = useState([]);
21 |
22 | useEffect(() => {
23 | if (imageCoordinates.length >= images.length) {
24 | const timeoutId = setTimeout(props.onAnimationFinished, 2000);
25 | return () => clearTimeout(timeoutId);
26 | }
27 |
28 | const intervalId = setInterval(() => {
29 | setImageCoordinates(prevCoordinates => [
30 | ...prevCoordinates,
31 | { top: randomCoordinate(), left: randomCoordinate() }
32 | ]);
33 | }, 250);
34 | return () => clearInterval(intervalId);
35 | }, [imageCoordinates.length, images.length]);
36 |
37 | return (
38 | <>
39 | {imageCoordinates.map((coord, index) => (
40 |
47 | ))}
48 | >
49 | );
50 | }
51 |
52 | export default FaceHitAnimation;
53 |
54 | function BackgroundImage({ imageSrc, top, left, last }) {
55 | if (last) {
56 | return
;
57 | }
58 |
59 | return (
60 |
65 | );
66 | }
67 |
68 | function randomCoordinate() {
69 | return Math.round(Math.random() * 100);
70 | }
71 |
--------------------------------------------------------------------------------
/worker/worker.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "sync"
7 | "time"
8 |
9 | "github.com/codeuniversity/smag-mvp/service"
10 | )
11 |
12 | // Worker abstracts away all the executor lifecycle hooks, exposing a much more high level api
13 | type Worker struct {
14 | executor *service.Executor
15 | name string
16 | step func() error
17 | shutdownHooks []shutdownHook
18 |
19 | stopTimeout time.Duration
20 |
21 | shutdownOnce sync.Once
22 | }
23 |
24 | // Start tells the worker to go and do work in another goroutine.
25 | // Returns a wait func that blocks until the worker is closed or encountered an error it can't handle
26 | func (w *Worker) Start() (wait func()) {
27 | go w.work()
28 |
29 | return w.executor.WaitUntilClosed
30 | }
31 |
32 | // Close Worker work
33 | func (w *Worker) Close() {
34 | w.shutdownOnce.Do(w.shutdown)
35 | }
36 |
37 | func (w *Worker) shutdown() {
38 | log.Println("stopping", w.name)
39 | w.executor.Stop()
40 | log.Println("waiting for work to stop")
41 | w.executor.WaitUntilStopped(w.stopTimeout)
42 |
43 | log.Println("calling shutdown hooks")
44 | for _, hook := range w.shutdownHooks {
45 | log.Println("shutting down", hook.name)
46 | err := w.callHookWithRecover(hook.f)
47 | if err != nil {
48 | log.Println("encountered error on shutdown: ", err)
49 | }
50 | }
51 |
52 | log.Println(w.name, "is shut down")
53 | w.executor.MarkAsClosed()
54 | }
55 |
56 | func (w *Worker) work() {
57 | defer w.Close()
58 |
59 | defer func() {
60 | log.Println(w.name, "is done here")
61 | w.executor.MarkAsStopped()
62 | }()
63 |
64 | log.Println("starting", w.name)
65 |
66 | for w.executor.IsRunning() {
67 | err := w.callHookWithRecover(w.step)
68 | if err != nil {
69 | log.Println("encountered error while working: ", err)
70 | return
71 | }
72 | }
73 | }
74 |
75 | func (w *Worker) callHookWithRecover(hook func() error) (err error) {
76 | defer func() {
77 | if r := recover(); r != nil {
78 | err = fmt.Errorf("recovered from panic: %s", r)
79 | }
80 | }()
81 |
82 | err = hook()
83 | return
84 | }
85 |
--------------------------------------------------------------------------------
/insta/filter/post_pictures/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/insta/models"
7 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
8 | "github.com/codeuniversity/smag-mvp/service"
9 | "github.com/codeuniversity/smag-mvp/utils"
10 |
11 | "github.com/segmentio/kafka-go"
12 | )
13 |
14 | func main() {
15 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
17 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts")
18 | downloadTopic := utils.GetStringFromEnvWithDefault("KAFKA_PICTURE_DOWNLOADS_TOPIC", "insta_post_picture_download_jobs")
19 |
20 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, downloadTopic, filterChange)
21 |
22 | service.CloseOnSignal(f)
23 | waitUntilClosed := f.Start()
24 |
25 | waitUntilClosed()
26 | }
27 |
28 | type post struct {
29 | ID int `json:"id"`
30 | PictureURL string `json:"picture_url"`
31 | }
32 |
33 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) {
34 | if !(m.Payload.Op == "c" || m.Payload.Op == "u") {
35 | return nil, nil
36 | }
37 |
38 | currentVersion := &post{}
39 | err := json.Unmarshal(m.Payload.After, currentVersion)
40 | if err != nil {
41 | return nil, err
42 | }
43 |
44 | if currentVersion.PictureURL == "" {
45 | return nil, nil
46 | }
47 |
48 | if m.Payload.Op == "c" {
49 | return constructDownloadJobMessage(currentVersion)
50 | }
51 |
52 | previousVersion := &post{}
53 | err = json.Unmarshal(m.Payload.Before, previousVersion)
54 | if err != nil {
55 | return nil, err
56 | }
57 |
58 | if currentVersion.PictureURL != previousVersion.PictureURL {
59 | return constructDownloadJobMessage(currentVersion)
60 | }
61 |
62 | return nil, nil
63 | }
64 |
65 | func constructDownloadJobMessage(p *post) ([]kafka.Message, error) {
66 | job := &models.PostDownloadJob{
67 | PostID: p.ID,
68 | PictureURL: p.PictureURL,
69 | }
70 | b, err := json.Marshal(job)
71 | if err != nil {
72 | return nil, err
73 | }
74 |
75 | return []kafka.Message{
76 | {Value: b},
77 | }, nil
78 | }
79 |
--------------------------------------------------------------------------------
/worker/builder.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "errors"
5 | "time"
6 |
7 | "github.com/codeuniversity/smag-mvp/service"
8 | )
9 |
10 | const (
11 | defaultStopTimeout = 5 * time.Second
12 | )
13 |
14 | // Builder is for configuring a worker
15 | type Builder struct {
16 | name string
17 | step func() error
18 | shutdownHooks []shutdownHook
19 | stopTimeout time.Duration
20 | }
21 |
22 | // WithName sets the worker name (required)
23 | func (b Builder) WithName(n string) Builder {
24 | b.name = n
25 | return b
26 | }
27 |
28 | // WithWorkStep sets the step func a worker should repeatedly call (required)
29 | func (b Builder) WithWorkStep(s func() error) Builder {
30 | b.step = s
31 | return b
32 | }
33 |
34 | // AddShutdownHook registers the hook to be called on shutdown of the worker
35 | func (b Builder) AddShutdownHook(hookName string, hook func() error) Builder {
36 | b.shutdownHooks = append(b.shutdownHooks, shutdownHook{
37 | f: hook,
38 | name: hookName,
39 | })
40 |
41 | return b
42 | }
43 |
44 | // WithStopTimeout changes the timeout used when stopping the worker loop. If not set, uses defaultStopTimeout
45 | func (b Builder) WithStopTimeout(t time.Duration) Builder {
46 | b.stopTimeout = t
47 | return b
48 | }
49 |
50 | // Build a Worker with the given configuration
51 | func (b Builder) Build() (*Worker, error) {
52 | if !b.valid() {
53 | return nil, errors.New("could not build worker: both name and work step have to be set")
54 | }
55 | w := &Worker{
56 | executor: service.New(),
57 | name: b.name,
58 | step: b.step,
59 | shutdownHooks: b.shutdownHooks,
60 | stopTimeout: b.stopTimeout,
61 | }
62 |
63 | if w.stopTimeout == 0 {
64 | w.stopTimeout = defaultStopTimeout
65 | }
66 |
67 | return w, nil
68 | }
69 |
70 | // MustBuild a Worker with the given configuration. Panics if not all required config is given
71 | func (b Builder) MustBuild() *Worker {
72 | w, err := b.Build()
73 |
74 | if err != nil {
75 | panic(err)
76 | }
77 |
78 | return w
79 | }
80 |
81 | func (b Builder) valid() bool {
82 | return b.name != "" && b.step != nil
83 | }
84 |
85 | type shutdownHook struct {
86 | f func() error
87 | name string
88 | }
89 |
--------------------------------------------------------------------------------
/frontend/public/background.js:
--------------------------------------------------------------------------------
1 | var typeface;
2 |
3 | var fontSize = 18;
4 | var streams = [];
5 | var fadeInterval = 1;
6 |
7 | function preload() {
8 | typeface = loadFont("Barlow-SemiBold.otf");
9 | }
10 |
11 | function setup() {
12 | createCanvas(windowWidth, windowHeight);
13 | textSize(fontSize);
14 |
15 | var xstream = 0;
16 | for (var i = 0; i <= width / fontSize; i++) {
17 | var stream = new Stream();
18 | stream.generateLetters(xstream, random(-windowHeight, windowHeight));
19 | streams.push(stream);
20 | xstream += fontSize;
21 | }
22 | }
23 |
24 | function draw() {
25 | background(0, 200);
26 | noStroke();
27 |
28 | streams.forEach(function(stream) {
29 | stream.render();
30 | });
31 | }
32 |
33 | function Letter(x, y, speed, first, opacity) {
34 | this.x = x;
35 | this.y = y;
36 | this.value;
37 | this.speed = speed;
38 | this.switch = round(random(10, 80));
39 | this.first = first;
40 | this.opacity = opacity;
41 |
42 | this.RandomLetter = function() {
43 | if (frameCount % this.switch === 0) {
44 | this.value = String.fromCharCode(0x00 + round(random(48, 90)));
45 | }
46 | };
47 |
48 | this.rain = function() {
49 | this.y = this.y >= height ? 0 : (this.y += this.speed);
50 | };
51 | }
52 |
53 | function Stream() {
54 | this.letters = [];
55 | this.totalLetters = round(random(15, 30));
56 | this.speed = random(2, 5);
57 |
58 | this.generateLetters = function(x, y) {
59 | var opacity = 255;
60 | var first = round(random(0, 4)) === 1;
61 | for (var i = 0; i <= this.totalLetters; i++) {
62 | letter = new Letter(x, y, this.speed, first, opacity);
63 | letter.RandomLetter();
64 | this.letters.push(letter);
65 | opacity -= 255 / this.totalLetters / fadeInterval;
66 | y -= fontSize;
67 | first = false;
68 | }
69 | };
70 |
71 | this.render = function() {
72 | this.letters.forEach(function(letter) {
73 | if (letter.first) {
74 | fill(150, 220, 255, letter.opacity);
75 | } else {
76 | fill(42, 159, 216, letter.opacity);
77 | }
78 | text(letter.value, letter.x, letter.y);
79 | letter.rain();
80 | letter.RandomLetter();
81 | });
82 | };
83 | }
84 |
--------------------------------------------------------------------------------
/insta/filter/post_face-recon/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 |
6 | "github.com/codeuniversity/smag-mvp/insta/models"
7 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
8 | "github.com/codeuniversity/smag-mvp/service"
9 | "github.com/codeuniversity/smag-mvp/utils"
10 |
11 | "github.com/segmentio/kafka-go"
12 | )
13 |
14 | func main() {
15 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "127.0.0.1:9092")
16 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
17 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts")
18 | downloadTopic := utils.GetStringFromEnvWithDefault("KAFKA_PICTURE_FACE_RECON_TOPIC", "insta_post_face_recon_jobs")
19 |
20 | f := changestream.NewFilter(kafkaAddress, groupID, changesTopic, downloadTopic, filterChange)
21 |
22 | service.CloseOnSignal(f)
23 | waitUntilClosed := f.Start()
24 |
25 | waitUntilClosed()
26 | }
27 |
28 | type post struct {
29 | ID int `json:"id"`
30 | InternalPictureURL string `json:"internal_picture_url"`
31 | }
32 |
33 | func filterChange(m *changestream.ChangeMessage) ([]kafka.Message, error) {
34 | if !(m.Payload.Op == "c" || m.Payload.Op == "u") {
35 | return nil, nil
36 | }
37 |
38 | currentVersion := &post{}
39 | err := json.Unmarshal(m.Payload.After, currentVersion)
40 | if err != nil {
41 | return nil, err
42 | }
43 |
44 | if m.Payload.Op == "c" {
45 | return constructDownloadJobMessage(currentVersion)
46 | }
47 |
48 | previousVersion := &post{}
49 | err = json.Unmarshal(m.Payload.Before, previousVersion)
50 | if err != nil {
51 | return nil, err
52 | }
53 |
54 | if currentVersion.InternalPictureURL != previousVersion.InternalPictureURL {
55 | return constructDownloadJobMessage(currentVersion)
56 | }
57 |
58 | return nil, nil
59 | }
60 |
61 | func constructDownloadJobMessage(p *post) ([]kafka.Message, error) {
62 | if p.InternalPictureURL == "" {
63 | return nil, nil
64 | }
65 |
66 | job := &models.PostDownloadJob{
67 | PostID: p.ID,
68 | PictureURL: p.InternalPictureURL,
69 | }
70 | b, err := json.Marshal(job)
71 | if err != nil {
72 | return nil, err
73 | }
74 |
75 | return []kafka.Message{
76 | {Value: b},
77 | }, nil
78 | }
79 |
--------------------------------------------------------------------------------
/insta/filter/post_pictures/filter_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestFilter(t *testing.T) {
11 | validPayloadJSON := []byte("{\"id\":1,\"picture_url\":\"https://test.test\"}")
12 | preUpdatePayloadJSON := []byte("{\"id\":1,\"picture_url\":\"https://test2.test2\"}")
13 | invalidPayloadJSON := []byte("{\"id\":\"1\",\"picture_url\":\"https://test.test\"}")
14 |
15 | t.Run("create event with unmarshable json", func(t *testing.T) {
16 | //create test input
17 | changeMsg := &changestream.ChangeMessage{}
18 | changeMsg.Payload.Op = "c"
19 | changeMsg.Payload.After = validPayloadJSON
20 |
21 | kMessages, err := filterChange(changeMsg)
22 |
23 | expected := "{\"post_id\":1,\"picture_url\":\"https://test.test\"}"
24 |
25 | assert.Nil(t, err, "no error")
26 | assert.Equal(t, 1, len(kMessages))
27 | assert.Equal(t, expected, string(kMessages[0].Value))
28 | })
29 |
30 | t.Run("update event with unmarshable json", func(t *testing.T) {
31 | //create test input
32 | changeMsg := &changestream.ChangeMessage{}
33 | changeMsg.Payload.Op = "u"
34 | changeMsg.Payload.Before = preUpdatePayloadJSON
35 | changeMsg.Payload.After = validPayloadJSON
36 |
37 | kMessages, err := filterChange(changeMsg)
38 |
39 | expected := "{\"post_id\":1,\"picture_url\":\"https://test.test\"}"
40 |
41 | assert.Nil(t, err, "no error")
42 | assert.Equal(t, 1, len(kMessages))
43 | assert.Equal(t, expected, string(kMessages[0].Value))
44 | })
45 |
46 | t.Run("create event with not unmarshable json", func(t *testing.T) {
47 | //create test input
48 | changeMsg := &changestream.ChangeMessage{}
49 | changeMsg.Payload.Op = "c"
50 | changeMsg.Payload.After = invalidPayloadJSON
51 |
52 | kMessages, err := filterChange(changeMsg)
53 |
54 | assert.NotNil(t, err, "error occurs")
55 | assert.Nil(t, kMessages, "nil output")
56 | })
57 |
58 | t.Run("ignored event", func(t *testing.T) {
59 | //create test input
60 | changeMsg := &changestream.ChangeMessage{}
61 | changeMsg.Payload.Op = "d"
62 |
63 | kMessages, err := filterChange(changeMsg)
64 |
65 | assert.Nil(t, err, "no error")
66 | assert.Nil(t, kMessages, "nil output")
67 | })
68 | }
69 |
--------------------------------------------------------------------------------
/insta/indexer/faces/insta_faces_indexer.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "strconv"
6 |
7 | "github.com/codeuniversity/smag-mvp/elastic"
8 | "github.com/codeuniversity/smag-mvp/elastic/indexer"
9 | esModels "github.com/codeuniversity/smag-mvp/elastic/models"
10 | "github.com/codeuniversity/smag-mvp/insta/models"
11 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
12 | "github.com/codeuniversity/smag-mvp/service"
13 | "github.com/codeuniversity/smag-mvp/utils"
14 | )
15 |
16 | func main() {
17 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
18 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
19 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10)
20 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.face_data")
21 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5)
22 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9200"})
23 |
24 | i := indexer.New(esHosts, elastic.FacesIndex, elastic.FacesIndexMapping, kafkaAddress, changesTopic, groupID, indexFace, bulkChunkSize, bulkFetchTimeoutSeconds)
25 |
26 | service.CloseOnSignal(i)
27 | waitUntilDone := i.Start()
28 |
29 | waitUntilDone()
30 | }
31 |
32 | func indexFace(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) {
33 |
34 | switch m.Payload.Op {
35 | case "r", "u", "c":
36 | break
37 | default:
38 | return nil, nil
39 | }
40 |
41 | face := &models.FaceData{}
42 | err := json.Unmarshal(m.Payload.After, face)
43 | if err != nil {
44 | return nil, err
45 | }
46 |
47 | return createBulkIndexOperation(face)
48 | }
49 |
50 | func createBulkIndexOperation(face *models.FaceData) (*indexer.BulkIndexDoc, error) {
51 | bulkOperation := `{ "index": {} }` + "\n"
52 |
53 | doc, err := esModels.FaceDocFromFaceData(face)
54 | if err != nil {
55 | return nil, err
56 | }
57 |
58 | docJson, err := json.Marshal(doc)
59 |
60 | if err != nil {
61 | return nil, err
62 | }
63 |
64 | docJson = append(docJson, "\n"...)
65 |
66 | bulkUpsertBody := bulkOperation + string(docJson)
67 |
68 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(int(face.ID)), BulkOperation: bulkUpsertBody}, err
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/frontend/src/components/camera-feed.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from "react";
2 |
3 | export class CameraFeed extends Component {
4 | /**
5 | * Processes available devices and identifies one by the label
6 | * @memberof CameraFeed
7 | * @instance
8 | */
9 | processDevices(devices) {
10 | devices.forEach(device => {
11 | this.setDevice(device);
12 | });
13 | }
14 |
15 | /**
16 | * Sets the active device and starts playing the feed
17 | * @memberof CameraFeed
18 | * @instance
19 | */
20 | async setDevice(device) {
21 | const { deviceId } = device;
22 | const stream = await navigator.mediaDevices.getUserMedia({
23 | audio: false,
24 | video: { deviceId }
25 | });
26 | this.videoPlayer.srcObject = stream;
27 | this.videoPlayer.play();
28 | }
29 |
30 | /**
31 | * On mount, grab the users connected devices and process them
32 | * @memberof CameraFeed
33 | * @instance
34 | * @override
35 | */
36 | async componentDidMount() {
37 | const cameras = await navigator.mediaDevices.enumerateDevices();
38 | this.processDevices(cameras);
39 |
40 | this.intervalId = setInterval(() => {
41 | this.takePhoto();
42 | }, 2000);
43 | }
44 |
45 | componentWillUnmount() {
46 | clearInterval(this.intervalId);
47 | this.videoPlayer.pause();
48 | }
49 |
50 | /**
51 | * Handles taking a still image from the video feed on the camera
52 | * @memberof CameraFeed
53 | * @instance
54 | */
55 | takePhoto = () => {
56 | const { onFileSubmit } = this.props;
57 | const context = this.canvas.getContext("2d");
58 | context.drawImage(this.videoPlayer, 0, 0, 1280, 720);
59 | this.canvas.toBlob(onFileSubmit);
60 | };
61 |
62 | stopTakingPictures = () => {
63 | clearInterval(this.intervalId);
64 | };
65 |
66 | render() {
67 | return (
68 |
69 |
70 |
77 |
78 |
80 |
81 | );
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/kafka/changestream/filter.go:
--------------------------------------------------------------------------------
1 | package changestream
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "time"
8 |
9 | kf "github.com/codeuniversity/smag-mvp/kafka"
10 | "github.com/codeuniversity/smag-mvp/worker"
11 |
12 | "github.com/segmentio/kafka-go"
13 | )
14 |
15 | // Filter is responsible for reading the change stream,
16 | // filtering out the events that are not interesting to us
17 | // and writing new messages based on the changes to the filtered topic
18 | type Filter struct {
19 | *worker.Worker
20 |
21 | changesReader *kafka.Reader
22 | filteredWriter *kafka.Writer
23 |
24 | filterFunc FilterFunc
25 | }
26 |
27 | // FilterFunc given a ChangeMessage from the changesTopic
28 | // returns zero, one or multiple kafka Messages that should be written to the filteredTopic
29 | type FilterFunc func(*ChangeMessage) ([]kafka.Message, error)
30 |
31 | // NewFilter returns an initilized Filter
32 | func NewFilter(kafkaAddress, kafkaGroupID, changesTopic, filteredTopic string, filter FilterFunc) *Filter {
33 | readerConfig := kf.NewReaderConfig(kafkaAddress, kafkaGroupID, changesTopic)
34 | writerConfig := kf.NewWriterConfig(kafkaAddress, filteredTopic, true)
35 |
36 | f := &Filter{
37 | changesReader: kf.NewReader(readerConfig),
38 | filteredWriter: kf.NewWriter(writerConfig),
39 | filterFunc: filter,
40 | }
41 |
42 | b := worker.Builder{}.WithName(fmt.Sprintf("changestream_filter[%s->%s]", changesTopic, filteredTopic)).
43 | WithWorkStep(f.runStep).
44 | WithStopTimeout(10*time.Second).
45 | AddShutdownHook("changesReader", f.changesReader.Close).
46 | AddShutdownHook("filteredWriter", f.filteredWriter.Close)
47 |
48 | f.Worker = b.MustBuild()
49 |
50 | return f
51 | }
52 |
53 | func (t *Filter) runStep() error {
54 | m, err := t.changesReader.FetchMessage(context.Background())
55 | if err != nil {
56 | return err
57 | }
58 |
59 | changeMessage := &ChangeMessage{}
60 | err = json.Unmarshal(m.Value, changeMessage)
61 | if err != nil {
62 | return err
63 | }
64 |
65 | kafkaMessages, err := t.filterFunc(changeMessage)
66 | if err != nil {
67 | return err
68 | }
69 |
70 | if len(kafkaMessages) > 0 {
71 | err = t.filteredWriter.WriteMessages(context.Background(), kafkaMessages...)
72 | if err != nil {
73 | return err
74 | }
75 | }
76 |
77 | return t.changesReader.CommitMessages(context.Background(), m)
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
1 | # SMAG gRPC Web API
2 |
3 | ## About
4 |
5 | In our project we are using a [gRPC Web](https://grpc.io/docs/) API. For that we are using an [envoy proxy](https://www.envoyproxy.io/docs/envoy/latest/) to be able to connect to the gRPC Server. As our system is not publicly accessible an AWS Account in our Organisation with the appropriate access is required.
6 |
7 | ## Requirements
8 |
9 | In order to successfully use our api make sure to have:
10 |
11 | - a running [kubernetes setup](https://github.com/codeuniversity/smag-deploy/blob/master/README.md) (permssion required)
12 | - _optional for local testing_: [protoc](http://google.github.io/proto-lens/installing-protoc.html) to generate the protofiles for the frontend
13 |
14 | ## Usage
15 |
16 | To use the production enviroment do the following steps:
17 |
18 | 1. Get name of envoy proxy `kubectl get pods | grep envoy`
19 | 2. Forward the envoy-pod port with `kubectl port-forward envoy-proxy-deployment-6b89675d5b-d86c4 4000:8080`
20 | 3. To make use of the API in the React Frontend import and run the following:
21 | ```javascript
22 | import {
23 | User,
24 | UserNameRequest,
25 | UserIdRequest,
26 | InstaPostsResponse,
27 | UserSearchResponse
28 | } from "./protofiles/client/usersearch_pb.js";
29 | import { UserSearchServiceClient } from "./protofiles/client/usersearch_grpc_web_pb.js";
30 | var userSearch = new UserSearchServiceClient("http://localhost:4000");
31 | var request = new UserName();
32 | request.setUserName("codeuniversity");
33 | userSearch.getUserWithUsername(request, {}, function(err, response) {
34 | //example function call...
35 | });
36 | ```
37 |
38 | ## Functions
39 |
40 | To check the attributes of the proto messages take a look at the protofile [userserach.proto](https://github.com/codeuniversity/smag-mvp/blob/master/api/proto/usersearch.proto)
41 |
42 | | **Method** | **Function Name** | **Input Message** | **Return Message** |
43 | | ---------- | ------------------------ | ----------------- | ------------------ |
44 | | GET | getUserWithUsername | UserNameRequest | User |
45 | | GET | getAllUsersLikeUsername | UserNameRequest | UserSearchResponse |
46 | | GET | getTaggedPostsWithUserId | UserIdRequest | InstaPostsResponse |
47 | | GET | getInstaPostssWithUserid | UserIdRequest | UserSearchResponse |
48 |
--------------------------------------------------------------------------------
/service/executor.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "log"
5 | "os"
6 | "os/signal"
7 | "syscall"
8 | "time"
9 | )
10 |
11 | //Service is a closeable service that usually includes an Executor
12 | type Service interface {
13 | Close()
14 | }
15 |
16 | // CloseOnSignal calls the closeFunc on the os signals SIGINT and SIGTERM
17 | func CloseOnSignal(s Service) {
18 | go func() {
19 | signals := make(chan os.Signal, 1)
20 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
21 | sig := <-signals
22 | log.Println("Received Signal:", sig)
23 |
24 | s.Close()
25 | }()
26 | }
27 |
28 | //Executor handles gracefully closing execution for services
29 | //Closing an executor goes through the states running, stopping, stopped and closed.
30 | type Executor struct {
31 | stopChan chan struct{}
32 | stoppedChan chan struct{}
33 | closedChan chan struct{}
34 | }
35 |
36 | //New returns an Executor ready for use.
37 | func New() *Executor {
38 | return &Executor{
39 | stopChan: make(chan struct{}, 1),
40 | stoppedChan: make(chan struct{}, 1),
41 | closedChan: make(chan struct{}, 1),
42 | }
43 | }
44 |
45 | //IsRunning should be used to determine if the execution should be halted.
46 | // Will be false until Close() is called
47 | func (e *Executor) IsRunning() bool {
48 | return len(e.stopChan) == 0
49 | }
50 |
51 | // Stop is to be called when the executor should stop
52 | // This is only safe to call once
53 | func (e *Executor) Stop() {
54 | e.stopChan <- struct{}{}
55 | }
56 |
57 | //MarkAsStopped is to be called when the execution was stopped.
58 | //All used resources can be cracefully closed and dispossed of now
59 | func (e *Executor) MarkAsStopped() {
60 | e.stoppedChan <- struct{}{}
61 | }
62 |
63 | // WaitUntilStopped blocks until either MarkAsStopped() was called in any goroutine
64 | // or the timeout has passed
65 | func (e *Executor) WaitUntilStopped(timeout time.Duration) {
66 | t := time.NewTimer(timeout)
67 | select {
68 | case <-t.C:
69 | break
70 | case <-e.stoppedChan:
71 | t.Stop()
72 | break
73 | }
74 | }
75 |
76 | //MarkAsClosed is to be called when the service using the executor is closed.
77 | //The process can be stopped or killed now
78 | func (e *Executor) MarkAsClosed() {
79 | e.closedChan <- struct{}{}
80 | }
81 |
82 | // WaitUntilClosed waits until the Close func call of the executor is finished
83 | func (e *Executor) WaitUntilClosed() {
84 | <-e.closedChan
85 | }
86 |
--------------------------------------------------------------------------------
/api/proto/usersearch.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | package proto;
4 |
5 | message User {
6 | string id = 1;
7 | string user_name = 2;
8 | string real_name = 3;
9 | string bio = 4;
10 | string avatar_url = 5;
11 | repeated User followings = 6;
12 | repeated User followers = 7;
13 | }
14 |
15 | message UserSearchResponse {
16 | repeated User user_list = 1;
17 | }
18 |
19 | message UserNameRequest {
20 | string user_name = 1;
21 | }
22 |
23 | message UserIdRequest {
24 | string user_id = 1;
25 | }
26 |
27 | message InstaPost {
28 | string id = 1;
29 | string post_id = 2;
30 | string img_url = 3;
31 | string caption = 4;
32 | repeated User tagged_users = 5;
33 | string short_code = 6;
34 |
35 | }
36 |
37 | message InstaPostsResponse {
38 | string user_id = 1;
39 | repeated InstaPost insta_posts = 2;
40 | }
41 |
42 | message FaceSearchRequest {
43 | string base64EncodedPicture = 1;
44 | }
45 |
46 | message Face {
47 | int32 x = 1;
48 | int32 y = 2;
49 | int32 width = 3;
50 | int32 height = 4;
51 | int32 post_id = 5;
52 | string full_image_src = 6;
53 | }
54 |
55 | message FaceSearchResponse {
56 | repeated Face faces = 1;
57 | }
58 |
59 | message WeightedPostWithFaces {
60 | int32 post_id = 1;
61 | int32 weight = 2;
62 | repeated Face faces = 3;
63 | }
64 |
65 | message WeightedPosts{
66 | repeated WeightedPostWithFaces posts = 1;
67 | }
68 |
69 | message UserWithFaces {
70 | User user = 1;
71 | repeated Face faces = 2;
72 | int32 weight = 3;
73 | }
74 |
75 | message WeightedUsers {
76 | repeated UserWithFaces users_with_faces = 1;
77 | }
78 |
79 | message UserDataPointCount {
80 | int32 count = 1;
81 | }
82 |
83 | message FoundCities {
84 | repeated string city_names = 1;
85 | }
86 |
87 | service UserSearchService {
88 | rpc GetAllUsersLikeUsername(UserNameRequest) returns (UserSearchResponse);
89 | rpc GetUserWithUserId(UserIdRequest) returns (User);
90 | rpc GetUserWithUsername(UserNameRequest) returns (User);
91 | rpc GetInstaPostsWithUserId(UserIdRequest) returns (InstaPostsResponse);
92 | rpc SearchSimilarFaces(FaceSearchRequest) returns(FaceSearchResponse);
93 | rpc SearchUsersWithWeightedPosts(WeightedPosts) returns (WeightedUsers);
94 | rpc DataPointCountForUserId(UserIdRequest) returns (UserDataPointCount);
95 | rpc FindCitiesForUserId(UserIdRequest) returns (FoundCities);
96 | }
97 |
--------------------------------------------------------------------------------
/twitter/inserter/posts/inserter.go:
--------------------------------------------------------------------------------
1 | package inserter
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "log"
8 | "time"
9 |
10 | "github.com/jinzhu/gorm"
11 | // necessary for gorm :pointup:
12 | _ "github.com/jinzhu/gorm/dialects/postgres"
13 |
14 | dbUtils "github.com/codeuniversity/smag-mvp/db"
15 | "github.com/codeuniversity/smag-mvp/twitter/models"
16 | "github.com/codeuniversity/smag-mvp/utils"
17 | "github.com/codeuniversity/smag-mvp/worker"
18 |
19 | "github.com/segmentio/kafka-go"
20 | )
21 |
22 | // Inserter represents the inserter containing all clients it uses
23 | type Inserter struct {
24 | *worker.Worker
25 |
26 | qReader *kafka.Reader
27 |
28 | db *gorm.DB
29 | }
30 |
31 | // New returns an initilized inserter
32 | func New(postgresHost, postgresPassword, dbName string, qReader *kafka.Reader) *Inserter {
33 | i := &Inserter{}
34 | i.qReader = qReader
35 |
36 | connectionString := fmt.Sprintf("host=%s user=postgres dbname=%s sslmode=disable", postgresHost, dbName)
37 | if postgresPassword != "" {
38 | connectionString += " " + "password=" + postgresPassword
39 | }
40 |
41 | db, err := gorm.Open("postgres", connectionString)
42 | utils.PanicIfNotNil(err)
43 | i.db = db // use db.Debug() here to get detailed gorm logs
44 |
45 | db.AutoMigrate(&models.TwitterPost{})
46 |
47 | b := worker.Builder{}.WithName("twitter_inserter_posts").
48 | WithWorkStep(i.runStep).
49 | WithStopTimeout(10*time.Second).
50 | AddShutdownHook("qReader", qReader.Close).
51 | AddShutdownHook("postgres_client", db.Close)
52 |
53 | i.Worker = b.MustBuild()
54 |
55 | return i
56 | }
57 |
58 | // Run the inserter
59 | func (i *Inserter) runStep() error {
60 | m, err := i.qReader.FetchMessage(context.Background())
61 | if err != nil {
62 | return err
63 | }
64 |
65 | rawPost := &models.TwitterPostRaw{}
66 |
67 | err = json.Unmarshal(m.Value, rawPost)
68 | if err != nil {
69 | return err
70 | }
71 |
72 | post := models.ConvertTwitterPost(rawPost)
73 | log.Println("inserting post:", post.Link)
74 |
75 | err = i.insertPost(post)
76 | if err != nil {
77 | return err
78 | }
79 | return i.qReader.CommitMessages(context.Background(), m)
80 | }
81 |
82 | func (i *Inserter) insertPost(post *models.TwitterPost) error {
83 | fromPost := &models.TwitterPost{}
84 | filter := &models.TwitterPost{PostIdentifier: post.PostIdentifier}
85 |
86 | err := dbUtils.CreateOrUpdate(i.db, fromPost, filter, post)
87 | if err != nil {
88 | return err
89 | }
90 |
91 | return nil
92 | }
93 |
--------------------------------------------------------------------------------
/twitter/inserter/users/inserter.go:
--------------------------------------------------------------------------------
1 | package inserter
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "log"
8 | "time"
9 |
10 | "github.com/jinzhu/gorm"
11 | // necessary for gorm :pointup:
12 | _ "github.com/jinzhu/gorm/dialects/postgres"
13 |
14 | dbUtils "github.com/codeuniversity/smag-mvp/db"
15 | "github.com/codeuniversity/smag-mvp/twitter/models"
16 | "github.com/codeuniversity/smag-mvp/utils"
17 | "github.com/codeuniversity/smag-mvp/worker"
18 |
19 | "github.com/segmentio/kafka-go"
20 | )
21 |
22 | // Inserter represents the scraper containing all clients it uses
23 | type Inserter struct {
24 | *worker.Worker
25 |
26 | qReader *kafka.Reader
27 |
28 | db *gorm.DB
29 | }
30 |
31 | // New returns an initilized inserter
32 | func New(postgresHost, postgresPassword, dbName string, qReader *kafka.Reader) *Inserter {
33 | i := &Inserter{}
34 | i.qReader = qReader
35 |
36 | connectionString := fmt.Sprintf("host=%s user=postgres dbname=%s sslmode=disable", postgresHost, dbName)
37 | if postgresPassword != "" {
38 | connectionString += " " + "password=" + postgresPassword
39 | }
40 |
41 | db, err := gorm.Open("postgres", connectionString)
42 | utils.PanicIfNotNil(err)
43 | i.db = db // use db.Debug() here to get detailed gorm logs
44 |
45 | db.AutoMigrate(&models.TwitterUser{})
46 |
47 | b := worker.Builder{}.WithName("twitter_inserter_users").
48 | WithWorkStep(i.runStep).
49 | WithStopTimeout(10*time.Second).
50 | AddShutdownHook("qReader", qReader.Close).
51 | AddShutdownHook("postgres_client", db.Close)
52 |
53 | i.Worker = b.MustBuild()
54 |
55 | return i
56 | }
57 |
58 | func (i *Inserter) runStep() error {
59 | m, err := i.qReader.FetchMessage(context.Background())
60 | if err != nil {
61 | return err
62 | }
63 |
64 | rawUser := &models.TwitterUserRaw{}
65 |
66 | err = json.Unmarshal(m.Value, rawUser)
67 | if err != nil {
68 | return err
69 | }
70 |
71 | user := models.ConvertTwitterUser(rawUser)
72 | log.Println("inserting user: ", user.Username)
73 |
74 | err = i.insertUser(user)
75 | if err != nil {
76 | return err
77 | }
78 |
79 | return i.qReader.CommitMessages(context.Background(), m)
80 | }
81 |
82 | func (i *Inserter) insertUser(user *models.TwitterUser) error {
83 | var err error
84 |
85 | baseUser := &models.TwitterUser{}
86 | filter := &models.TwitterUser{Username: user.Username}
87 |
88 | err = dbUtils.CreateOrUpdate(i.db, baseUser, filter, user)
89 | if err != nil {
90 | return err
91 | }
92 |
93 | return nil
94 | }
95 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | ## Social Record front-end
2 |
3 | The Social Record front-end displays the analyzed data from instagram of the visitor of the exhibition. Using the front-end the visitors can experience what public data social media providers expose about them.
4 |
5 | ### Prerequisities
6 |
7 | | | | | | |
8 | | -------------- | ------------------------------------------------------------------ | --- | --- | --- |
9 | | Node.js | `v10.16.0 |
10 | | Go | `go 1.13` ([go modules](https://blog.golang.org/using-go-modules)) | | | |
11 | | Docker | `v19.x` | | | |
12 | | Docker Compose | `v1.25.x` | | | |
13 |
14 | ### Getting started
15 |
16 | **1. Clone repo** [`https://github.com/codeuniversity/smag-mvp.git`](https://github.com/codeuniversity/smag-mvp.git)
17 |
18 | **2. Switch to the front-end folder** `cd smag-mvp/frontend`
19 |
20 | **3. Install all dependencies** `npm install`
21 |
22 | **4. Run the application in development mode** `npm start` (runs on `localhost:3000`)
23 |
24 | **5. To deploy to production you can create a minified bundle** `npm run build`
25 |
26 | **6. Run all services in docker to locally test the application.**
27 |
28 | 1. Start all services `make run`
29 | 2. Add `127.0.0.1 my-kafka` and `127.0.0.1. minio` to your `/etc/hosts` file
30 | 3. Choose a user_name as a starting point and run `go run cli/main/main.go instagram `
31 |
32 | ### React Component Design
33 |
34 | | Name | Prop Name | Data Structure | Example | Description |
35 | | --------------- | ------------ | -------------- | --------------------- | ------------------------------------------------------------------ |
36 | | `` | h1 | String | `""` | Displays the h1 headline in the App component and Start component. |
37 | | `` | key | Function | `(post.shortcode)` | Displays the instagram post in the Result component. |
38 | | `` | onSubmit | Function | `(name.string)=>void` | Displays the search form in the App component. |
39 | | `Camera-Feed/>` | onFileSubmit | Function | `(file:File)=>void` | Implements the camera feed in the Start component. |
40 |
--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
15 |
20 |
25 |
30 |
35 |
40 |
45 |
50 |
56 |
62 |
68 |
74 |
75 |
76 |
77 |
78 |
79 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 | SMAG
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | We are always happy about support for our project. If you dicide to help us spreading awareness for privacy, just start with the following steps:
4 | 1) Fork the project
5 | 2) Create a new branch
6 | 3) Commit your changes
7 | 4) Open a pull request to `master`
8 |
9 | (More about this in ["Branching & Naming"](#branching--naming))
10 |
11 | For your Git Commit Messages, please orientate on the guidance in the following [article](https://chris.beams.io/posts/git-commit/):
12 |
13 | > - Limit the subject line to 50 characters
14 | > - Capitalize the subject line
15 | > - Use the imperative mood in the subject line
16 | > - Use the body to explain what and why, less how
17 |
18 | ### Pull Requests
19 |
20 | Every new feature must be developed on a feature branch and merged into master. **Please do not push directly to master!** We also provide a [Pull Request Template](https://github.com/codeuniversity/smag-mvp/blob/master/.github/pull_request_template.md) for additional guidance. In any case, the Pull Request has to be reviewed and approved by at least one other developer before merging. Please make sure to [reference the associated issue(s)](https://help.github.com/en/github/managing-your-work-on-github/closing-issues-using-keywords) in the pull request.
21 |
22 | ## Branching & Naming
23 |
24 | Next to `feature/` and `fix/` branches, we also have a `master` and a `production` branch. `master` is our development branch were new code is merged into first - release versions for roll-out will be merged into `production`.
25 |
26 | For the naming of software components, please orientate on the existing components and folders. Especially if you build something specific to one of the platforms (e.g `insta(gram)`, `twitter`, ...), please make sure to use the regarding prefix for folder-names. Else, please stick to formatting conventions for [Golang](https://golang.org/doc/effective_go.html), [Python](https://www.python.org/dev/peps/pep-0008/) and [React.js](https://hackernoon.com/structuring-projects-and-naming-components-in-react-1261b6e18d76).
27 |
28 | ## Task Management
29 |
30 | For our task management, we are using the [ZenHub GitHub Extension](https://www.zenhub.com/extension) which integrates a project board into GitHub. After installing the extension and reloading your browser, you will be able to see an addional `ZenHub` Tab in our repo. In there, you can see our current tasks `"In Progress"` and upcomming tasks `"ToDo"` of the current release we are working on. All tasks are represented as GitHub issues as well, so you might want to [create an own GitHub issue](https://github.com/codeuniversity/smag-mvp/issues/new/choose) for the beginning.
31 |
32 | If you have any questions or want to get more involved in the project, feel free to approach the team via: [socialrecord-project[at]code.berlin](socialrecord-project@code.berlin).
33 |
--------------------------------------------------------------------------------
/insta/indexer/comments/insta_comments_indexer.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "strconv"
6 |
7 | "github.com/codeuniversity/smag-mvp/elastic"
8 | "github.com/codeuniversity/smag-mvp/elastic/indexer"
9 | "github.com/codeuniversity/smag-mvp/elastic/models"
10 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
11 | "github.com/codeuniversity/smag-mvp/service"
12 | "github.com/codeuniversity/smag-mvp/utils"
13 | )
14 |
15 | func main() {
16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
18 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10)
19 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts")
20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5)
21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"localhost:9201"})
22 |
23 | i := indexer.New(esHosts, elastic.CommentsIndex, elastic.CommentsIndexMapping, kafkaAddress, changesTopic, groupID, indexComment, bulkChunkSize, bulkFetchTimeoutSeconds)
24 |
25 | service.CloseOnSignal(i)
26 | waitUntilClosed := i.Start()
27 |
28 | waitUntilClosed()
29 | }
30 |
31 | func indexComment(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) {
32 | comment := &models.InstaComment{}
33 | err := json.Unmarshal(m.Payload.After, comment)
34 |
35 | if err != nil {
36 | return nil, err
37 | }
38 |
39 | switch m.Payload.Op {
40 | case "r", "c":
41 | return createBulkUpsertOperation(comment)
42 | }
43 |
44 | return nil, nil
45 | }
46 |
47 | func createBulkUpsertOperation(comment *models.InstaComment) (*indexer.BulkIndexDoc, error) {
48 | var bulkOperation = map[string]interface{}{
49 | "update": map[string]interface{}{
50 | "_id": comment.ID,
51 | "_index": elastic.CommentsIndex,
52 | },
53 | }
54 |
55 | bulkOperationJson, err := json.Marshal(bulkOperation)
56 | if err != nil {
57 | return nil, err
58 | }
59 |
60 | bulkOperationJson = append(bulkOperationJson, "\n"...)
61 | var commentUpsert = map[string]interface{}{
62 | "script": map[string]interface{}{
63 | "source": "ctx._source.comment = params.comment",
64 | "lang": "painless",
65 | "params": map[string]interface{}{
66 | "comment": comment.Comment,
67 | },
68 | },
69 | "upsert": map[string]interface{}{
70 | "post_id": comment.PostID,
71 | "comment": comment.Comment,
72 | },
73 | }
74 |
75 | commentUpsertJson, err := json.Marshal(commentUpsert)
76 |
77 | if err != nil {
78 | return nil, err
79 | }
80 |
81 | commentUpsertJson = append(commentUpsertJson, "\n"...)
82 |
83 | bulkUpsertBody := string(bulkOperationJson) + string(commentUpsertJson)
84 |
85 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(comment.ID), BulkOperation: bulkUpsertBody}, err
86 | }
87 |
--------------------------------------------------------------------------------
/neo4j/inserter/inserter.go:
--------------------------------------------------------------------------------
1 | package neo4jinserter
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "log"
8 | "time"
9 |
10 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
11 | "github.com/codeuniversity/smag-mvp/utils"
12 | "github.com/codeuniversity/smag-mvp/worker"
13 |
14 | "github.com/neo4j/neo4j-go-driver/neo4j"
15 | kf "github.com/segmentio/kafka-go"
16 | )
17 |
18 | // Inserter represents the scraper containing all clients it uses
19 | type Inserter struct {
20 | *worker.Worker
21 |
22 | qReader *kf.Reader
23 | driver neo4j.Driver
24 | session neo4j.Session
25 |
26 | inserterFunc InserterFunc
27 | }
28 |
29 | // InserterFunc is responsible to unmashal to the
30 | // needed Data from the change Message and inserts
31 | // it into neo4j
32 | type InserterFunc func(*changestream.ChangeMessage, neo4j.Session) error
33 |
34 | // New returns an initilized scraper
35 | func New(neo4jConfig *utils.Neo4jConfig, userQReader *kf.Reader, inserterFunc InserterFunc) *Inserter {
36 | i := &Inserter{}
37 |
38 | i.qReader = userQReader
39 | i.inserterFunc = inserterFunc
40 |
41 | session, driver, err := initializeNeo4j(neo4jConfig)
42 | if err != nil {
43 | panic(err)
44 | }
45 | i.session = session
46 | i.driver = driver
47 |
48 | log.Println("✅ Neo4j Connection established")
49 |
50 | i.Worker = worker.Builder{}.WithName("neo4j-inserter").
51 | WithWorkStep(i.runStep).
52 | WithStopTimeout(10*time.Second).
53 | AddShutdownHook("userQReader", userQReader.Close).
54 | AddShutdownHook("Neo4j Driver", driver.Close).
55 | AddShutdownHook("Neo4j Session", session.Close).
56 | MustBuild()
57 |
58 | return i
59 | }
60 |
61 | // runStep the inserter
62 | func (i *Inserter) runStep() error {
63 | m, err := i.qReader.FetchMessage(context.Background())
64 |
65 | if err != nil {
66 | return err
67 | }
68 |
69 | changeMessage := &changestream.ChangeMessage{}
70 |
71 | err = json.Unmarshal(m.Value, changeMessage)
72 |
73 | if err != nil {
74 | return err
75 | }
76 |
77 | err = i.inserterFunc(changeMessage, i.session)
78 |
79 | if err != nil {
80 | return err
81 | }
82 |
83 | log.Println("Inserted")
84 | return i.qReader.CommitMessages(context.Background(), m)
85 | }
86 |
87 | //initializeNeo4j sets connection and constraints for neo4j
88 | func initializeNeo4j(config *utils.Neo4jConfig) (neo4j.Session, neo4j.Driver, error) {
89 | address := fmt.Sprintf("bolt://%s:7687", config.Host)
90 | driver, err := neo4j.NewDriver(address, neo4j.BasicAuth(config.Username, config.Password, ""))
91 | if err != nil {
92 | return nil, nil, err
93 | }
94 |
95 | session, err := driver.Session(neo4j.AccessModeWrite)
96 | if err != nil {
97 | return nil, nil, err
98 | }
99 |
100 | _, err = session.Run("CREATE CONSTRAINT ON (U:USER) ASSERT U.id IS UNIQUE", nil)
101 | if err != nil {
102 | return nil, nil, err
103 | }
104 |
105 | return session, driver, nil
106 | }
107 |
--------------------------------------------------------------------------------
/twitter/scraper/twitterscraper/scraper_manager.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import traceback
4 |
5 | from kafka import KafkaProducer, KafkaConsumer
6 |
7 |
8 | class ScraperManager(object):
9 |
10 | name = "scraper_manager"
11 |
12 | def __init__(
13 | self,
14 | fetch_topic: str,
15 | insert_topic: str,
16 | kafka_consumer_group: str = "scraper_manager",
17 | kafka_address: str = "localhost:9092",
18 | ):
19 | self.consumer = KafkaConsumer(
20 | fetch_topic,
21 | bootstrap_servers=kafka_address,
22 | group_id=kafka_consumer_group,
23 | reconnect_backoff_ms=500,
24 | reconnect_backoff_max_ms=10000,
25 | max_poll_interval_ms=600000,
26 | )
27 | self.producer = KafkaProducer(
28 | bootstrap_servers=kafka_address,
29 | value_serializer=lambda v: json.dumps(v).encode('utf-8'),
30 | reconnect_backoff_ms=500,
31 | reconnect_backoff_max_ms=10000,
32 | request_timeout_ms=600000,
33 | )
34 | self.insert_topic = insert_topic
35 |
36 | def run(self):
37 | try:
38 | while True:
39 | self.consume_scrape_produce()
40 | except Exception:
41 | logging.error(
42 | "Caught error. Going to flush KafkaProducer and then throw error further."
43 | )
44 | self.producer.flush()
45 | raise
46 |
47 | def consume_scrape_produce(self) -> None:
48 | """
49 | Consumes from kafka,
50 | scrapes via custom function,
51 | and produces/sends scraped msges to kafka
52 | """
53 |
54 | for msg in self.consumer:
55 | user_name = msg.value.decode("utf-8")
56 | try:
57 | self.scrape_and_produce(user_name)
58 | except Exception:
59 | self.consumer.commit()
60 | traceback.print_exc()
61 | logging.error(f"Couldn't scrape user {user_name}. Continuing")
62 |
63 | def scrape_and_produce(self, user_name: str) -> None:
64 | msg = self.scrape(user_name)
65 | msg_list = msg if type(msg) is list else [msg]
66 | for m in msg_list:
67 | self.produce(m)
68 | logging.info(
69 | f"Done sending {len(msg_list)} element(s) to kafka/{self.insert_topic}"
70 | )
71 |
72 | def scrape(self, user_name: str):
73 | """This method will be implemented by the user to scrape either user-profile or tweets"""
74 | raise NotImplementedError(
75 | "You need to implement a scrape(user_name: str) method, "
76 | "which returns an object to be written to kafka."
77 | )
78 |
79 | def produce(self, msg) -> None:
80 | topic = self.insert_topic
81 | logging.debug(
82 | f"{self.name} sends msg (from {msg.username}) to kafka/{topic}"
83 | )
84 | msg_dict = getattr(msg, "__dict__")
85 | self.producer.send(topic, msg_dict)
86 |
--------------------------------------------------------------------------------
/insta/indexer/users/insta_users_indexer.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "strconv"
6 |
7 | "github.com/codeuniversity/smag-mvp/elastic"
8 | "github.com/codeuniversity/smag-mvp/elastic/indexer"
9 | "github.com/codeuniversity/smag-mvp/elastic/models"
10 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
11 | "github.com/codeuniversity/smag-mvp/service"
12 | "github.com/codeuniversity/smag-mvp/utils"
13 | )
14 |
15 | func main() {
16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
17 | groupID := utils.GetStringFromEnvWithDefault("KAFKA_GROUPID", "insta_usersearch-inserter")
18 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.users")
19 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10)
20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5)
21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"http://localhost:9201"})
22 |
23 | i := indexer.New(esHosts, elastic.UsersIndex, elastic.UsersIndexMapping, kafkaAddress, changesTopic, groupID, handleChangeMessage, bulkChunkSize, bulkFetchTimeoutSeconds)
24 |
25 | service.CloseOnSignal(i)
26 | waitUntilClosed := i.Start()
27 |
28 | waitUntilClosed()
29 | }
30 |
31 | // handleChangeMessage filters relevant events and upserts them
32 | func handleChangeMessage(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) {
33 | user := &models.InstaUser{}
34 | if err := json.Unmarshal(m.Payload.After, user); err != nil {
35 | return nil, err
36 | }
37 |
38 | switch m.Payload.Op {
39 | case "c", "r", "u":
40 | return createBulkUpsertOperation(user)
41 | }
42 | return nil, nil
43 | }
44 |
45 | func createBulkUpsertOperation(user *models.InstaUser) (*indexer.BulkIndexDoc, error) {
46 | var bulkOperation = map[string]interface{}{
47 | "update": map[string]interface{}{
48 | "_id": user.ID,
49 | "_index": elastic.UsersIndex,
50 | },
51 | }
52 |
53 | bulkOperationJson, err := json.Marshal(bulkOperation)
54 |
55 | if err != nil {
56 | return nil, err
57 | }
58 |
59 | bulkOperationJson = append(bulkOperationJson, "\n"...)
60 |
61 | var usersUpsert = map[string]interface{}{
62 | "script": map[string]interface{}{
63 | "source": "ctx._source.user_name = params.user_name; ctx._source.real_name = params.real_name; ctx._source.bio = params.bio",
64 | "lang": "painless",
65 | "params": map[string]interface{}{
66 | "user_name": user.Username,
67 | "real_name": user.Realname,
68 | "bio": user.Bio,
69 | },
70 | },
71 | "upsert": map[string]interface{}{
72 | "user_name": user.Username,
73 | "real_name": user.Realname,
74 | "bio": user.Bio,
75 | },
76 | }
77 |
78 | usersUpsertJson, err := json.Marshal(usersUpsert)
79 |
80 | if err != nil {
81 | return nil, err
82 | }
83 |
84 | usersUpsertJson = append(usersUpsertJson, "\n"...)
85 |
86 | bulkUpsertBody := string(bulkOperationJson) + string(usersUpsertJson)
87 |
88 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(user.ID), BulkOperation: bulkUpsertBody}, err
89 | }
90 |
--------------------------------------------------------------------------------
/insta/indexer/posts/insta_posts_indexer.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "strconv"
6 |
7 | "github.com/codeuniversity/smag-mvp/elastic"
8 | "github.com/codeuniversity/smag-mvp/elastic/indexer"
9 | "github.com/codeuniversity/smag-mvp/elastic/models"
10 | "github.com/codeuniversity/smag-mvp/kafka/changestream"
11 | "github.com/codeuniversity/smag-mvp/service"
12 | "github.com/codeuniversity/smag-mvp/utils"
13 | )
14 |
15 | func main() {
16 | kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
17 | groupID := utils.MustGetStringFromEnv("KAFKA_GROUPID")
18 | bulkChunkSize := utils.GetNumberFromEnvWithDefault("BULK_CHUNK_SIZE", 10)
19 | changesTopic := utils.GetStringFromEnvWithDefault("KAFKA_CHANGE_TOPIC", "postgres.public.posts")
20 | bulkFetchTimeoutSeconds := utils.GetNumberFromEnvWithDefault("BULK_FETCH_TIMEOUT_SECONDS", 5)
21 | esHosts := utils.GetMultipleStringsFromEnvWithDefault("ES_HOSTS", []string{"localhost:9201"})
22 |
23 | i := indexer.New(esHosts, elastic.PostsIndex, elastic.PostsIndexMapping, kafkaAddress, changesTopic, groupID, indexPost, bulkChunkSize, bulkFetchTimeoutSeconds)
24 |
25 | service.CloseOnSignal(i)
26 | waitUntilClosed := i.Start()
27 |
28 | waitUntilClosed()
29 | }
30 |
31 | func indexPost(m *changestream.ChangeMessage) (*indexer.BulkIndexDoc, error) {
32 | currentPost := &models.InstaPost{}
33 | err := json.Unmarshal(m.Payload.After, currentPost)
34 |
35 | if err != nil {
36 | return nil, err
37 | }
38 |
39 | switch m.Payload.Op {
40 | case "r", "c":
41 | return createBulkUpsertOperation(currentPost)
42 | case "u":
43 | previousPost := &models.InstaPost{}
44 | err := json.Unmarshal(m.Payload.Before, previousPost)
45 |
46 | if err != nil {
47 | return nil, err
48 | }
49 |
50 | if previousPost.Caption != currentPost.Caption {
51 | return createBulkUpsertOperation(currentPost)
52 | }
53 | }
54 |
55 | return nil, nil
56 | }
57 |
58 | func createBulkUpsertOperation(post *models.InstaPost) (*indexer.BulkIndexDoc, error) {
59 | var bulkOperation = map[string]interface{}{
60 | "update": map[string]interface{}{
61 | "_id": post.ID,
62 | "_index": elastic.PostsIndex,
63 | },
64 | }
65 |
66 | bulkOperationJson, err := json.Marshal(bulkOperation)
67 |
68 | if err != nil {
69 | return nil, err
70 | }
71 |
72 | bulkOperationJson = append(bulkOperationJson, "\n"...)
73 |
74 | var commentUpsert = map[string]interface{}{
75 | "script": map[string]interface{}{
76 | "source": "ctx._source.caption = params.caption",
77 | "lang": "painless",
78 | "params": map[string]interface{}{
79 | "caption": post.Caption,
80 | },
81 | },
82 | "upsert": map[string]interface{}{
83 | "user_id": post.UserID,
84 | "caption": post.Caption,
85 | },
86 | }
87 |
88 | postUpsertJson, err := json.Marshal(commentUpsert)
89 |
90 | if err != nil {
91 | return nil, err
92 | }
93 |
94 | postUpsertJson = append(postUpsertJson, "\n"...)
95 | bulkUpsertBody := string(bulkOperationJson) + string(postUpsertJson)
96 |
97 | return &indexer.BulkIndexDoc{DocumentId: strconv.Itoa(post.ID), BulkOperation: bulkUpsertBody}, err
98 | }
99 |
--------------------------------------------------------------------------------
/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "os"
7 | "strconv"
8 | "strings"
9 | "time"
10 |
11 | "github.com/google/uuid"
12 | )
13 |
14 | //WithRetries calls f up to the given `times` and returns the last error if times is reached
15 | func WithRetries(times int, f func() error) error {
16 | var err error
17 | for i := 0; i < times; i++ {
18 | err = f()
19 | if err == nil {
20 | return nil
21 | }
22 | log.Println(err)
23 | time.Sleep(100 * time.Millisecond)
24 | }
25 | return err
26 | }
27 |
28 | //GetStringFromEnvWithDefault returns default Value if OS Environment Variable is not set
29 | func GetStringFromEnvWithDefault(enVarName, defaultValue string) string {
30 | envValue := os.Getenv(enVarName)
31 | if envValue == "" {
32 | return defaultValue
33 | }
34 |
35 | return envValue
36 | }
37 |
38 | // GetNumberFromEnvWithDefault returns default Value if OS Environment Variable is not set
39 | func GetNumberFromEnvWithDefault(envVarName string, defaultValue int) int {
40 | envValue := os.Getenv(envVarName)
41 | number, err := strconv.ParseInt(envValue, 10, 64)
42 | if err != nil {
43 | return defaultValue
44 | }
45 |
46 | return int(number)
47 | }
48 |
49 | //MustGetStringFromEnv panics if OS Environment Variable is not set
50 | func MustGetStringFromEnv(enVarName string) string {
51 | envValue := os.Getenv(enVarName)
52 | if envValue == "" {
53 | panic(fmt.Sprintf("%s must not be empty", enVarName))
54 | }
55 |
56 | return envValue
57 | }
58 |
59 | func GetMultipleStringsFromEnvWithDefault(envVarName string, defaultValue []string) []string {
60 | envValue := os.Getenv(envVarName)
61 | if envValue == "" {
62 | return defaultValue
63 | }
64 | envValues := strings.Split(envValue, ",")
65 | return envValues
66 | }
67 |
68 | // GetBoolFromEnvWithDefault parses an OS Environment Variable as bool
69 | func GetBoolFromEnvWithDefault(enVarName string, defaultValue bool) bool {
70 | envValue := os.Getenv(enVarName)
71 | if envValue == "" {
72 | return defaultValue
73 | }
74 |
75 | envBool, err := strconv.ParseBool(envValue)
76 | if err != nil {
77 | panic(fmt.Errorf("couldn't parse %s as bool: %s", enVarName, err))
78 | }
79 |
80 | return envBool
81 | }
82 |
83 | // PanicIfNotNil panics if err != nil
84 | func PanicIfNotNil(err error) {
85 | if err != nil {
86 | //TODO: graceful shutdown
87 | panic(err)
88 | }
89 | }
90 |
91 | // MustBeNil panics if err != nil
92 | func MustBeNil(err error) {
93 | if err != nil {
94 | panic(err)
95 | }
96 | }
97 |
98 | // ConvertIntToBool converts an integer to a bool (binary)
99 | func ConvertIntToBool(value int) bool {
100 | if value == 1 {
101 | return true
102 | }
103 | return false
104 | }
105 |
106 | // ConvertDateStrToTime converts a dateStr to a time.Time obj
107 | func ConvertDateStrToTime(dateStr string) (time.Time, error) {
108 | return time.Parse("02 Jan 2006", dateStr)
109 | }
110 |
111 | // RandUUIDSeq returns a random uuid string
112 | func RandUUIDSeq() string {
113 | id, err := uuid.NewRandom()
114 | MustBeNil(err)
115 |
116 | return id.String()
117 | }
118 |
--------------------------------------------------------------------------------