├── .gitignore
├── LICENSE
├── README.md
├── data-analysis
    ├── Dockerfile
    ├── amazon-books-memgraph.py
    ├── art-blocks-analysis.py
    ├── art-blocks-memgraph.py
    ├── github-analysis.py
    ├── github-commits-memgraph.py
    ├── movielens-memgraph.py
    └── requirements.txt
├── datasets
    ├── amazon-books
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── data
    │   │   └── books.csv
    │   ├── produce.py
    │   └── requirements.txt
    ├── art-blocks
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── data
    │   │   ├── accounts.csv
    │   │   ├── create_csvs.py
    │   │   ├── projects.csv
    │   │   ├── projects_and_sales.json
    │   │   ├── sales.csv
    │   │   └── tokens.csv
    │   ├── produce.py
    │   └── requirements.txt
    ├── github
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── data
    │   │   └── github-network.csv
    │   ├── produce.py
    │   ├── requirements.txt
    │   └── scraper
    │   │   ├── dependency_graph.py
    │   │   ├── requirements.txt
    │   │   └── scraper.py
    └── movielens
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── data
    │       ├── movies.csv
    │       └── ratings.csv
    │   ├── produce.py
    │   └── requirements.txt
├── docker-compose.yml
├── kafka
    ├── Dockerfile
    ├── connect.properties
    ├── init.sh
    └── kafka_server_jaas.conf
├── memgraph
    ├── Dockerfile
    ├── import-data
    │   ├── accounts.csv
    │   ├── projects.csv
    │   └── tokens.csv
    ├── query_modules
    │   ├── amazon_books_analysis.py
    │   └── movielens_analysis.py
    └── transformations
    │   ├── amazon_books.py
    │   ├── artblocks.py
    │   ├── github_commits.py
    │   └── movielens.py
├── platform_variables.env
├── start.py
└── stream
    ├── __init__.py
    ├── apache_pulsar.py
    ├── kafka_redpanda.py
    ├── producer.py
    └── rabbitmq.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # memgraph
132 | mg_lib/
133 | mg_log/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 g-despot
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> :bar_chart: data-streams :bar_chart:</h1>
 2 | <p align="center"> Publicly available real-time data sets on Kafka, Redpanda, RabbitMQ & Apache Pulsar</p>
 3 | 
 4 | ## :speech_balloon: About
 5 | 
 6 | This project serves as a starting point for analyzing real-time streaming data.
 7 | We have prepared a few cool datasets which can be streamed via Kafka, Redpanda,
 8 | RabbitMQ, and Apache Pulsar. Right now, you can clone/fork the repo and start
 9 | the service locally, but we will be adding publicly available clusters to which
10 | you can just connect.
11 | 
12 | ## :open_file_folder: Datasets
13 | 
14 | Currently available datasets:
15 | 
16 | - [Art Blocks](./datasets/art-blocks/data)
17 | - [GitHub](./datasets/github/data)
18 | - [MovieLens](./datasets/movielens/data)
19 | - [Amazon books](./datasets/amazon-books/data/)
20 | 
21 | ## :fast_forward: How to start the streams?
22 | 
23 | Place yourself in root folder and run:
24 | 
25 | ```
26 | python3 start.py --platforms <PLATFORMS> --dataset <DATASET>
27 | ```
28 | 
29 | The argument `<PLATFORMS>` can be:
30 | - `kafka`,
31 | - `redpanda`,
32 | - `rabbitmq` and/or
33 | - `pulsar`.
34 | 
35 | The argument `<DATASET>` can be:
36 | -  `github` ,
37 | -  `art-blocks` ,
38 | -  `movielens` or
39 | -  `amazon-books`.
40 | 
41 | That script will start chosen streaming platforms in docker container, and you will see messages from chosen dataset being consumed.
42 | 
43 | You can then connect with Memgraph and stream the data into the database by running:
44 | ```
45 | docker-compose up <DATASET>-memgraph
46 | ```
47 | 
48 | For example, if you choose Kafka as a streaming platform and art-blocks for your dataset, you should run:
49 | ```
50 | python3 start.py --platforms kafka --dataset art-blocks
51 | ```
52 | 
53 | > If you are a Windows user and the upper command doesn't work, try replacing `python3` with `python`.
54 | 
55 | Next, in the new terminal window run:
56 | ```
57 | docker-compose up art-blocks-memgraph
58 | ```
59 | 
60 | ## :scroll: References
61 | 
62 | There's no documentation yet, but it's coming soon! Throw us a star to keep up with upcoming changes.
63 | 


--------------------------------------------------------------------------------
/data-analysis/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | # Install CMake for gqlalchemy
 4 | RUN apt-get update && \
 5 |     apt-get --yes install cmake && \
 6 |     rm -rf /var/lib/apt/lists/*
 7 | 
 8 | # Install packages
 9 | COPY /data-analysis/requirements.txt ./
10 | RUN pip3 install -r requirements.txt
11 | 
12 | COPY /data-analysis/ /app/
13 | WORKDIR /app


--------------------------------------------------------------------------------
/data-analysis/amazon-books-memgraph.py:
--------------------------------------------------------------------------------
 1 | from gqlalchemy import Memgraph
 2 | from pathlib import Path
 3 | from time import sleep
 4 | import logging
 5 | import os
 6 | 
 7 | log = logging.getLogger(__name__)
 8 | 
 9 | MEMGRAPH_IP = os.getenv('MEMGRAPH_IP', 'memgraph-mage')
10 | MEMGRAPH_PORT = os.getenv('MEMGRAPH_PORT', '7687')
11 | 
12 | 
13 | def connect_to_memgraph(memgraph_ip, memgraph_port):
14 |     memgraph = Memgraph(host=memgraph_ip, port=int(memgraph_port))
15 |     while(True):
16 |         try:
17 |             if (memgraph._get_cached_connection().is_active()):
18 |                 return memgraph
19 |         except:
20 |             log.info("Memgraph probably isn't running.")
21 |             sleep(1)
22 | 
23 | 
24 | def set_stream(memgraph):
25 |     log.info("Creating stream connections on Memgraph")
26 |     memgraph.execute("CREATE KAFKA STREAM ratings_stream TOPICS book-ratings TRANSFORM amazon_books.book_ratings BOOTSTRAP_SERVERS 'kafka:9092' CREDENTIALS {'sasl.username':'public', 'sasl.password':'public', 'security.protocol':'SASL_PLAINTEXT', 'sasl.mechanism':'PLAIN'};")
27 |     memgraph.execute("START STREAM ratings_stream;")
28 | 
29 |     # TODO: What to do when a new object is created
30 |     """
31 |     log.info("Creating triggers on Memgraph")
32 |     memgraph.execute(
33 |         "CREATE TRIGGER...")
34 |     """
35 | 
36 | 
37 | def main():
38 |     memgraph = connect_to_memgraph(MEMGRAPH_IP, MEMGRAPH_PORT)
39 |     set_stream(memgraph)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/data-analysis/art-blocks-analysis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from kafka import KafkaConsumer
  3 | from time import sleep
  4 | import json
  5 | import pika
  6 | import pulsar
  7 | 
  8 | KAFKA_IP = os.getenv('KAFKA_IP', 'localhost')
  9 | KAFKA_PORT = os.getenv('KAFKA_PORT', '9093')
 10 | KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', 'sales')
 11 | REDPANDA_IP = os.getenv('REDPANDA_IP', 'localhost')
 12 | REDPANDA_PORT = os.getenv('REDPANDA_PORT', '29092')
 13 | REDPANDA_TOPIC = os.getenv('REDPANDA_TOPIC', 'sales')
 14 | RABBITMQ_IP = os.getenv('RABBITMQ_IP', 'localhost')
 15 | RABBITMQ_PORT = os.getenv('RABBITMQ_PORT', '5672')
 16 | RABBITMQ_QUEUE = os.getenv('RABBITMQ_QUEUE', 'sales')
 17 | PULSAR_IP = os.getenv('PULSAR_IP', 'localhost')
 18 | PULSAR_PORT = os.getenv('PULSAR_PORT', '6650')
 19 | PULSAR_TOPIC = os.getenv('PULSAR_TOPIC', 'sales')
 20 | KAFKA = os.getenv('KAFKA', 'False')
 21 | REDPANDA = os.getenv('REDPANDA', 'False')
 22 | RABBITMQ = os.getenv('RABBITMQ', 'False')
 23 | PULSAR = os.getenv('PULSAR', 'False')
 24 | 
 25 | project_sales = dict()
 26 | seler_sales = dict()
 27 | buyer_sales = dict()
 28 | best_sale = 0
 29 | day_sales = dict()
 30 | total_sales = 0
 31 | 
 32 | 
 33 | def analyze(message):
 34 |     # ----TOTAL SALES----
 35 |     global total_sales
 36 |     total_sales += 1
 37 |     print("Total number of sales: " + str(total_sales))
 38 |     print("------------------------------------------------------")
 39 | 
 40 |     # ----BEST PROJECT----
 41 |     global project_sales
 42 |     project_id = str(message["project_id"])
 43 |     if project_id in project_sales:
 44 |         project_sales[project_id] += 1
 45 |     else:
 46 |         project_sales[project_id] = 1
 47 |     best_project = max(project_sales, key=project_sales.get)
 48 |     print("Project with largest number of sales: " + str(best_project))
 49 |     print("Number of sales: " + str(project_sales[best_project]))
 50 |     print("------------------------------------------------------")
 51 | 
 52 |     # ----BEST SELLER----
 53 |     global seler_sales
 54 |     seller_id = str(message["seller_id"])
 55 |     if seller_id in seler_sales:
 56 |         seler_sales[seller_id] += 1
 57 |     else:
 58 |         seler_sales[seller_id] = 1
 59 |     best_seller = max(seler_sales, key=seler_sales.get)
 60 |     print("Seller with largest number of sales: " + str(best_seller))
 61 |     print("Number of sales: " + str(seler_sales[best_seller]))
 62 |     print("------------------------------------------------------")
 63 | 
 64 |     # ----BEST BUYER----
 65 |     global buyer_sales
 66 |     buyer_id = str(message["buyer_id"])
 67 |     if buyer_id in buyer_sales:
 68 |         buyer_sales[buyer_id] += 1
 69 |     else:
 70 |         buyer_sales[buyer_id] = 1
 71 |     best_buyer = max(buyer_sales, key=buyer_sales.get)
 72 |     print("Buyer with largest number of sales: " + str(best_buyer))
 73 |     print("Number of buys: " + str(buyer_sales[best_buyer]))
 74 |     print("------------------------------------------------------")
 75 | 
 76 |     # ----BEST SALE----
 77 |     global best_sale
 78 |     price = int(message["price"])
 79 |     if price > best_sale:
 80 |         best_sale = price
 81 |     print("Best sale price is: " + str(best_sale))
 82 |     print("Sale id: " + str(message["sale_id"]))
 83 |     print("------------------------------------------------------")
 84 | 
 85 |     # ----BEST DAY----
 86 |     global day_sales
 87 |     datetime = str(message["datetime"])
 88 |     date = datetime.split(" ")[0]
 89 |     if date in day_sales:
 90 |         day_sales[date] += 1
 91 |     else:
 92 |         day_sales[date] = 1
 93 |     best_day = max(day_sales, key=day_sales.get)
 94 |     print("Day with largest number of sales: " + str(best_day))
 95 |     print("Number of sales: " + str(day_sales[best_day]))
 96 |     print("------------------------------------------------------")
 97 | 
 98 | 
 99 | def consume_kafka_redpanda(ip, port, topic, platform):
100 |     print("Running kafka consumer")
101 |     consumer = KafkaConsumer(topic,
102 |                              bootstrap_servers=ip + ':' + port,
103 |                              auto_offset_reset='earliest',
104 |                              group_id=None)
105 |     try:
106 |         while True:
107 |             msg_pack = consumer.poll()
108 |             if not msg_pack:
109 |                 sleep(1)
110 |                 continue
111 |             for _, messages in msg_pack.items():
112 |                 for message in messages:
113 |                     message = json.loads(message.value.decode('utf8'))
114 |                     print(platform, " :", str(message))
115 |                     analyze(message)
116 | 
117 |     except KeyboardInterrupt:
118 |         pass
119 | 
120 | 
121 | def consume_rabbitmq(ip, port, queue, platform):
122 |     connection = pika.BlockingConnection(
123 |         pika.ConnectionParameters(host=ip))
124 |     channel = connection.channel()
125 | 
126 |     channel.queue_declare(queue=queue)
127 | 
128 |     def callback(ch, method, properties, body):
129 |         print(platform, ": ", str(body))
130 | 
131 |     channel.basic_consume(
132 |         queue=queue, on_message_callback=callback, auto_ack=True)
133 | 
134 |     print(' [*] Waiting for messages. To exit press CTRL+C')
135 |     channel.start_consuming()
136 | 
137 | 
138 | def consume_pulsar(ip, port, topic, platform):
139 |     client = pulsar.Client('pulsar://' + ip + ':' + port)
140 | 
141 |     consumer = client.subscribe(topic, 'my-subscription')
142 | 
143 |     while True:
144 |         msg = consumer.receive()
145 |         try:
146 |             print(platform, ": ", msg.data())
147 |             # Acknowledge successful processing of the message
148 |             consumer.acknowledge(msg)
149 |         except:
150 |             # Message failed to be processed
151 |             consumer.negative_acknowledge(msg)
152 |             client.close()
153 | 
154 | 
155 | def main():
156 |     if KAFKA == 'True':
157 |         consume_kafka_redpanda(KAFKA_IP, KAFKA_PORT, KAFKA_TOPIC, "Kafka")
158 |     elif REDPANDA == 'True':
159 |         consume_kafka_redpanda(REDPANDA_IP, REDPANDA_PORT,
160 |                                REDPANDA_TOPIC, "Redpanda")
161 |     elif RABBITMQ == 'True':
162 |         consume_rabbitmq(RABBITMQ_IP, REDPANDA_PORT, REDPANDA_TOPIC, "RabbitMQ")
163 |     elif PULSAR == 'True':
164 |         consume_pulsar(PULSAR_IP, PULSAR_PORT, PULSAR_TOPIC, "Pulsar")
165 | 
166 | 
167 | if __name__ == "__main__":
168 |     main()
169 | 


--------------------------------------------------------------------------------
/data-analysis/art-blocks-memgraph.py:
--------------------------------------------------------------------------------
 1 | from gqlalchemy import Memgraph
 2 | from pathlib import Path
 3 | from time import sleep
 4 | import logging
 5 | import os
 6 | 
 7 | log = logging.getLogger(__name__)
 8 | 
 9 | MEMGRAPH_IP = os.getenv('MEMGRAPH_IP', 'memgraph-mage')
10 | MEMGRAPH_PORT = os.getenv('MEMGRAPH_PORT', '7687')
11 | 
12 | 
13 | def connect_to_memgraph(memgraph_ip, memgraph_port):
14 |     memgraph = Memgraph(host=memgraph_ip, port=int(memgraph_port))
15 |     while(True):
16 |         try:
17 |             if (memgraph._get_cached_connection().is_active()):
18 |                 return memgraph
19 |         except:
20 |             log.info("Memgraph probably isn't running.")
21 |             sleep(1)
22 | 
23 | 
24 | def load_artblocks_data(memgraph):
25 |     memgraph.drop_database()
26 |     path_projects = Path("/usr/lib/memgraph/import-data/projects.csv")
27 |     path_accounts = Path("/usr/lib/memgraph/import-data/accounts.csv")
28 |     path_tokens = Path("/usr/lib/memgraph/import-data/tokens.csv")
29 | 
30 |     log.info("Loading projects...")
31 |     memgraph.execute(
32 |         f"""LOAD CSV FROM "{path_projects}"
33 |             WITH HEADER DELIMITER "," AS row
34 |             CREATE (p:Project {{project_id: row.project_id, project_name: row.project_name, active: row.active, complete: row.complete, locked: row.locked, website: row.website}})
35 |             MERGE (c:Contract {{contract_id: row.contract_id}})
36 |             CREATE (p)-[:IS_ON]->(c);"""
37 |     )
38 | 
39 |     memgraph.execute(f"""CREATE INDEX ON :Project(project_id);""")
40 | 
41 |     log.info("Loading accounts...")
42 |     memgraph.execute(
43 |         f"""LOAD CSV FROM "{path_accounts}"
44 |         WITH HEADER DELIMITER "," AS row
45 |         MATCH (p:Project) WHERE p.project_id = row.project_id
46 |         MERGE (a:Account {{account_id: row.account_id, account_name: row.account_name}})
47 |         CREATE (a)-[:CREATES]->(p);"""
48 |     )
49 | 
50 |     memgraph.execute(f"""CREATE INDEX ON :Account(account_id);""")
51 | 
52 |     log.info("Loading tokens...")
53 |     memgraph.execute(
54 |         f"""LOAD CSV FROM "{path_tokens}"
55 |         WITH HEADER DELIMITER "," AS row
56 |         MERGE (p:Project {{project_id: row.project_id}})
57 |         MERGE (a:Account {{account_id: row.owner_id}})
58 |         CREATE (t:Token {{token_id: row.token_id, created_at: row.created_at}})
59 |         CREATE (t)-[:IS_PART_OF]->(p)
60 |         CREATE (a)-[:MINTS]->(t);"""
61 |     )
62 | 
63 |     memgraph.execute(f"""CREATE INDEX ON :Token(token_id);""")
64 | 
65 | 
66 | def set_stream(memgraph):
67 |     log.info("Creating stream connections on Memgraph")
68 |     memgraph.execute(
69 |         "CREATE PULSAR STREAM sales_stream TOPICS sales TRANSFORM artblocks.sales SERVICE_URL 'pulsar://pulsar:6650'")
70 |     memgraph.execute("START STREAM sales_stream")
71 | 
72 |     # TODO: What to do when a new object is created
73 |     """
74 |     log.info("Creating triggers on Memgraph")
75 |     memgraph.execute(
76 |         "CREATE TRIGGER...")
77 |     """
78 | 
79 | 
80 | def main():
81 |     memgraph = connect_to_memgraph(MEMGRAPH_IP, MEMGRAPH_PORT)
82 |     load_artblocks_data(memgraph)
83 |     set_stream(memgraph)
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     main()
88 | 


--------------------------------------------------------------------------------
/data-analysis/github-analysis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from kafka import KafkaConsumer
 3 | from time import sleep
 4 | import json
 5 | import pika
 6 | import pulsar
 7 | 
 8 | KAFKA_IP = os.getenv('KAFKA_IP', 'localhost')
 9 | KAFKA_PORT = os.getenv('KAFKA_PORT', '9093')
10 | KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', 'github')
11 | REDPANDA_IP = os.getenv('REDPANDA_IP', 'localhost')
12 | REDPANDA_PORT = os.getenv('REDPANDA_PORT', '29092')
13 | REDPANDA_TOPIC = os.getenv('REDPANDA_TOPIC', 'github')
14 | RABBITMQ_IP = os.getenv('RABBITMQ_IP', 'localhost')
15 | RABBITMQ_PORT = os.getenv('RABBITMQ_PORT', '5672')
16 | RABBITMQ_QUEUE = os.getenv('RABBITMQ_QUEUE', 'github')
17 | PULSAR_IP = os.getenv('PULSAR_IP', 'localhost')
18 | PULSAR_PORT = os.getenv('PULSAR_PORT', '6650')
19 | PULSAR_TOPIC = os.getenv('PULSAR_TOPIC', 'github')
20 | KAFKA = os.getenv('KAFKA', 'False')
21 | REDPANDA = os.getenv('REDPANDA', 'False')
22 | RABBITMQ = os.getenv('RABBITMQ', 'False')
23 | PULSAR = os.getenv('PULSAR', 'False')
24 | 
25 | 
26 | def consume_kafka_redpanda(ip, port, topic, platform):
27 |     print("Running kafka consumer")
28 |     total = 0
29 |     consumer = KafkaConsumer(topic,
30 |                              bootstrap_servers=ip + ':' + port,
31 |                              auto_offset_reset='earliest',
32 |                              group_id=None)
33 |     try:
34 |         while True:
35 |             msg_pack = consumer.poll()
36 |             if not msg_pack:
37 |                 sleep(1)
38 |                 continue
39 |             for _, messages in msg_pack.items():
40 |                 for message in messages:
41 |                     message = json.loads(message.value.decode('utf8'))
42 |                     print(platform, " :", str(message))
43 |                     total += 1
44 |                     print("Total number of messages: " + str(total))
45 | 
46 |     except KeyboardInterrupt:
47 |         pass
48 | 
49 | 
50 | def consume_rabbitmq(ip, port, queue, platform):
51 |     connection = pika.BlockingConnection(
52 |         pika.ConnectionParameters(host=ip))
53 |     channel = connection.channel()
54 | 
55 |     channel.queue_declare(queue=queue)
56 | 
57 |     def callback(ch, method, properties, body):
58 |         print(platform, ": ", str(body))
59 | 
60 |     channel.basic_consume(
61 |         queue=queue, on_message_callback=callback, auto_ack=True)
62 | 
63 |     print(' [*] Waiting for messages. To exit press CTRL+C')
64 |     channel.start_consuming()
65 | 
66 | 
67 | def consume_pulsar(ip, port, topic, platform):
68 |     client = pulsar.Client('pulsar://' + ip + ':' + port)
69 | 
70 |     consumer = client.subscribe(topic, 'my-subscription')
71 | 
72 |     while True:
73 |         msg = consumer.receive()
74 |         try:
75 |             print(platform, ": ", msg.data())
76 |             # Acknowledge successful processing of the message
77 |             consumer.acknowledge(msg)
78 |         except:
79 |             # Message failed to be processed
80 |             consumer.negative_acknowledge(msg)
81 |             client.close()
82 | 
83 | 
84 | def main():
85 |     if KAFKA == 'True':
86 |         consume_kafka_redpanda(KAFKA_IP, KAFKA_PORT, KAFKA_TOPIC, "Kafka")
87 |     elif REDPANDA == 'True':
88 |         consume_kafka_redpanda(REDPANDA_IP, REDPANDA_PORT,
89 |                                REDPANDA_TOPIC, "Redpanda")
90 |     elif RABBITMQ == 'True':
91 |         consume_rabbitmq(RABBITMQ_IP, REDPANDA_PORT, REDPANDA_TOPIC, "RabbitMQ")
92 |     elif PULSAR == 'True':
93 |         consume_pulsar(PULSAR_IP, PULSAR_PORT, PULSAR_TOPIC, "Pulsar")
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     main()
98 | 


--------------------------------------------------------------------------------
/data-analysis/github-commits-memgraph.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from gqlalchemy import Memgraph
 5 | from time import sleep
 6 | 
 7 | 
 8 | log = logging.getLogger(__name__)
 9 | 
10 | MEMGRAPH_IP = os.getenv('MEMGRAPH_IP', 'memgraph-mage')
11 | MEMGRAPH_PORT = os.getenv('MEMGRAPH_PORT', '7687')
12 | 
13 | 
14 | def connect_to_memgraph(memgraph_ip, memgraph_port):
15 |     memgraph = Memgraph(host=memgraph_ip, port=int(memgraph_port))
16 |     while(True):
17 |         try:
18 |             if (memgraph._get_cached_connection().is_active()):
19 |                 return memgraph
20 |         except:
21 |             log.info("Memgraph probably isn't running.")
22 |             sleep(1)
23 | 
24 | 
25 | def set_stream(memgraph):
26 |     log.info("Creating stream connections on Memgraph")
27 |     memgraph.execute("""CREATE KAFKA STREAM github_commits 
28 |                         TOPICS github 
29 |                         TRANSFORM github_commits.commit  
30 |                         BOOTSTRAP_SERVERS '54.74.181.194:9093'
31 |                         CREDENTIALS {'sasl.username':'public', 
32 |                                      'sasl.password':'public', 
33 |                                      'security.protocol':'SASL_PLAINTEXT', 
34 |                                      'sasl.mechanism':'PLAIN'};""")
35 |     memgraph.execute("START STREAM github_commits;")
36 | 
37 |     # TODO: What to do when a new object is created
38 |     """
39 |     log.info("Creating triggers on Memgraph")
40 |     memgraph.execute(
41 |         "CREATE TRIGGER...")
42 |     """
43 | 
44 | 
45 | def main():
46 |     memgraph = connect_to_memgraph(MEMGRAPH_IP, MEMGRAPH_PORT)
47 |     set_stream(memgraph)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/data-analysis/movielens-memgraph.py:
--------------------------------------------------------------------------------
 1 | from gqlalchemy import Memgraph
 2 | from pathlib import Path
 3 | from time import sleep
 4 | import logging
 5 | import os
 6 | 
 7 | log = logging.getLogger(__name__)
 8 | 
 9 | MEMGRAPH_IP = os.getenv('MEMGRAPH_IP', 'memgraph-mage')
10 | MEMGRAPH_PORT = os.getenv('MEMGRAPH_PORT', '7687')
11 | 
12 | 
13 | def connect_to_memgraph(memgraph_ip, memgraph_port):
14 |     memgraph = Memgraph(host=memgraph_ip, port=int(memgraph_port))
15 |     while(True):
16 |         try:
17 |             if (memgraph._get_cached_connection().is_active()):
18 |                 return memgraph
19 |         except:
20 |             log.info("Memgraph probably isn't running.")
21 |             sleep(1)
22 | 
23 | 
24 | def set_stream(memgraph):
25 |     log.info("Creating stream connections on Memgraph")
26 |     memgraph.execute(
27 |         "CREATE PULSAR STREAM ratings_stream TOPICS ratings TRANSFORM movielens.rating SERVICE_URL 'pulsar://pulsar:6650'")
28 |     memgraph.execute("START STREAM ratings_stream")
29 | 
30 |     # TODO: What to do when a new object is created
31 |     """
32 |     log.info("Creating triggers on Memgraph")
33 |     memgraph.execute(
34 |         "CREATE TRIGGER...")
35 |     """
36 | 
37 | 
38 | def main():
39 |     memgraph = connect_to_memgraph(MEMGRAPH_IP, MEMGRAPH_PORT)
40 |     set_stream(memgraph)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/data-analysis/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | pika==1.2.0
3 | pulsar-client==2.8.1
4 | gqlalchemy==1.0.5


--------------------------------------------------------------------------------
/datasets/amazon-books/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | # Install packages
 4 | COPY /datasets/amazon-books/requirements.txt ./
 5 | RUN pip3 install -r requirements.txt
 6 | 
 7 | COPY /datasets/amazon-books/ /app/
 8 | COPY /stream /app/stream/
 9 | WORKDIR /app
10 | 


--------------------------------------------------------------------------------
/datasets/amazon-books/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> :bar_chart: Amazon book rating stream :bar_chart:</h1>
 2 | 
 3 | ## :speech_balloon: About
 4 | 
 5 | Amazon started as a music, video and book web store. During the recent period it
 6 | grew to a technology giant that sells various different products and services. 
 7 | Today, Amazon e-commerce platform is one of the biggest in the world. According to 
 8 | Amazon data and at the time of writing, they receive 1.6 million orders daily. It 
 9 | makes sense to analyze their e-commerce data because of scale.      
10 | 
11 | ## :open_file_folder: Dataset
12 | 
13 | Books played a vital role in Amazon's business scaling and a large part of daily sales
14 | are book sales. So it makes sense to use it as a streaming dataset. 
15 | Dataset consists of user book ratings. We collected the ratings from: [Amazon dataset](https://jmcauley.ucsd.edu/data/amazon)
16 | 
17 | ## :fast_forward: Streaming book ratings
18 | 
19 | Check instructions in root [README](../../README.md) how to start data stream. 
20 | 
21 | ## :scroll: References
22 | 
23 | Data source: [Amazon dataset](https://jmcauley.ucsd.edu/data/amazon)
24 | 


--------------------------------------------------------------------------------
/datasets/amazon-books/produce.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import stream.producer as producer
 3 | 
 4 | DATA = "data/books.csv"
 5 | 
 6 | def generate():
 7 |     while True:
 8 |         with open(DATA) as file:
 9 |             csvReader = csv.DictReader(file)
10 |             for rows in csvReader:
11 |                 data = {
12 |                     'bookId': rows['bookId'],
13 |                     'userId': rows['userId'],
14 |                     'rating': rows['rating'],
15 |                     'timestamp': rows['timestamp'],
16 |                     'title' : rows['title']
17 |                 }
18 |                 yield data
19 | 
20 | def main():
21 |     producer.run(generate)
22 | 
23 | 
24 | if __name__== "__main__":
25 |     main()


--------------------------------------------------------------------------------
/datasets/amazon-books/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | pika==1.2.0
3 | pulsar-client==2.10.0
4 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | # Install packages
 4 | COPY /datasets/art-blocks/requirements.txt ./
 5 | RUN pip3 install -r requirements.txt
 6 | 
 7 | COPY /datasets/art-blocks/ /app/
 8 | COPY /stream /app/stream/
 9 | WORKDIR /app
10 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> :bar_chart: Art Blocks Streaming :bar_chart:</h1>
 2 | 
 3 | ## :speech_balloon: About
 4 | 
 5 | Art Blocks is a first of its kind platform focused on genuinely programmable on
 6 | demand generative content that is stored immutably on the Ethereum Blockchain.
 7 | You pick a style that you like, pay for the work, and a randomly generated
 8 | version of the content is created by an algorithm and sent to your Ethereum
 9 | account. The resulting piece might be a static image, 3D model, or an
10 | interactive experience. Each output is different and there are endless
11 | possibilities for the types of content that can be created on the platform.
12 | [[1]](#1)
13 | 
14 | ## :open_file_folder: Dataset
15 | 
16 | File `projects_and_sales.json` was obtained from Art Blocks
17 | [playground](https://thegraph.com/explorer/subgraph?id=0x3c3cab03c83e48e2e773ef5fc86f52ad2b15a5b0-0&view=Playground).
18 | To create `projects.csv`, `accounts.csv`, `tokens.csv` and `sales.csv` files
19 | place yourself into the `data` directory and run:
20 | 
21 | ```
22 | python3 create_csvs.py
23 | ```
24 | 
25 | ## :fast_forward: Streaming sales
26 | 
27 | Check the instructions in the root [README](../../README.md).
28 | 
29 | ## :scroll: References
30 | 
31 | <a id="1">[1]</a> Learn about art blocks at their
32 | [website](https://www.artblocks.io/learn).
33 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/data/accounts.csv:
--------------------------------------------------------------------------------
  1 | "project_id","account_id","account_name"
  2 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-0","0xb998a2520907ed1fc0f9f457b2219fb2720466cd","Snowfro"
  3 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-1","0xa9da6d2b707674a1cf5c3fbdee94c903b030d4e3","DCA"
  4 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-2","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
  5 | "0x28f2d3805652fb5d359486dffb7d08320d403240-0","0x2bc66765dce0e3f4878d78a8cc50cfcb9563b8ec",""
  6 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-10","0x1e8e749b2b578e181ca01962e9448006772b24a2","Bryan Brinkman"
  7 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-100","0x457ee5f723c7606c12a7264b52e285906f91eea6","Casey REAS"
  8 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-101","0xecc6043947fd65a7ba93e755d42594c7c7bc2cdb","Generative Artworks"
  9 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-102","0x47144372eb383466d18fc91db9cd0396aa6c87a4","Steve Pikelny"
 10 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-103","0x9eef6bcbff2b8a77869597842b09ac9d401811b5","Rich Lord"
 11 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-104","0x7e6d4810ea233d7588e3675d704571e29c4bcbba","Radix"
 12 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-105","0xe394104f0871b6f1fd46b2de688c3ea6f4cc84dd","Mark Cotton"
 13 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-106","0x9e192409efff9300432d089e9e3a6183cc26e5c0","Julien Gachadoat"
 14 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-107","0x707502591380bcfa98175486a38c15ce90e82097","Artem Verkhovskiy x Andy Shaw"
 15 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-108","0xf92bb2215684c353b4009395061ee7652883c365","Michael Connolly"
 16 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-109","0xd6dd6961d3224958fcd306b76a991ab974ec1ebc","Jake Rockland"
 17 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-11","0x35f64560c51c8772f75186a8931929589b7c8d80","Beervangeer"
 18 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-110","0xaccbee638fedfe3650be1fa3182b428483db8369","JEANVASCRIPT"
 19 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-111","0x29b2f895343cadfb3f5101bef6484b1f01c83dc9","Daniel Catt"
 20 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-112","0x4666fd1f89576e2d6fb3f2ecec5eefd3e1ba6b59","Shvembldr"
 21 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-113","0xe18fc96ba325ef22746ada9a82d521845a2c16f8","hideo"
 22 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-114","0xe88046be7445f9c21a3062131c166b45fb156110","Alida Sun"
 23 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-115","0x07911f74c5ef0ce80c57ebbf52033774055baa0c","TheElephantNL"
 24 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-116","0x44a1e2883f1e599664e511e6c1c7cc72d846f5fc","RVig"
 25 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-117","0x87f669c0ee22c42be261dd74143e716748ba11ba","Jason Ting x Matt Bilfield"
 26 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-118","0x4bf3805b23c99f8e0a5797e86fd0232a04a2a629","Mitchell F. Chan"
 27 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-119","0x129eb023b2f879b4c7dc4b19e7877bda35789773","Joshua Bagley"
 28 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-12","0xb033daedca113b0386eb3e8f4c72c79fc50ae32e","Zeblocks"
 29 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-120","0x0f441cfad93287109f5ef834bf52f4aaaa8d8ffa","Rafaël Rozendaal"
 30 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-121","0x161b79d4e135693361cb42b6a3e8067c8c34e744","Stefano Contiero"
 31 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-122","0x93f7cb21d6904492b33e0df24008c8b13ce64380","Hevey"
 32 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-123","0xfa0bf8ed3b94033129e061c968b3ec290c1d9e33","Hjalmar Åström"
 33 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-124","0xda8457bcc1096b4c66316b0a40c165d681bf244c","nonfigurativ"
 34 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-125","0xf359de2378bf25373a33a64e1f9b257673e3320c","steen & n-e-o"
 35 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-126","0x65bd6a518c0d58d314034d519ce69b3e05a806e4","WAWAA"
 36 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-127","0x51582e2c703b0d7c745c6a4ae0336c98c3c41802","Eliya Stein"
 37 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-128","0xb783cd9f3e74d52b320904292e0fbe720d333d97",""
 38 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-129","0xe452517f920950b5977bdc0387bedbe5253954c2","Darien Brito"
 39 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-13","0xe0753cfcabb86c2828b79a3ddd4faf6af0db0eb4","Dmitri Cherniak"
 40 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-130","0x2776621ff536af829919ab6cba8db434aeba43f9","Alexis André"
 41 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-131","0xb783cd9f3e74d52b320904292e0fbe720d333d97","William Tan"
 42 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-132","0xac80dc4112f7757c05d65c773e0803ae8af7b834","Superblob"
 43 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-133","0xfbc78f494ad61d90f02a3258e527de1321095acb","Joshua Davis / PrayStation"
 44 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-134","0xbd3527f0c0f6bd513f0a1560fc0108a291c82806","luxpris"
 45 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-135","0x12c0a19094a79feb81ee74501e67e3215b53b7dc","Matty Mariansky"
 46 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-136","0xb2a2d7eee0f6d9e0465d18e3ebdc7a3a78612cc0","Alexander Reben"
 47 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-137","0x4666fd1f89576e2d6fb3f2ecec5eefd3e1ba6b59","Shvembldr"
 48 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-138","0x9eef6bcbff2b8a77869597842b09ac9d401811b5","Rich Lord"
 49 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-139","0x7e6d4810ea233d7588e3675d704571e29c4bcbba","Radix"
 50 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-14","0x5f127b4323c0061768976ad34a1a2beb9db19886","pxlq"
 51 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-140","0x1f5743df7c907a74c8cc28fe0e27c575830ac6a6","Aluan Wang"
 52 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-141","0x5a77b2d05afb67eacd8f9c7e98692dd1e2883cb3","Thomas Lin Pedersen"
 53 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-142","0x9441295f5a5f77c090ae106f6724510f07fc4bca","k0ch"
 54 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-143","0x7f5a0a6847fd0fa05c13cbc02f435047b429e37c","Loren Bednar"
 55 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-144","0xa3e51498579db0f7bb1ec9e3093b2f44158e25a5","sgt_slaughtermelon & Tartaria Archivist"
 56 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-145","0xb033daedca113b0386eb3e8f4c72c79fc50ae32e","Zeblocks"
 57 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-146","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
 58 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-147","0x32f848b9436f6400e5fa1fd46e9b96f4541c0966","Anna Carreras"
 59 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-148","0xc5e08104c19dafd00fe40737490da9552db5bfe5","berk"
 60 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-149","0xd6dd6961d3224958fcd306b76a991ab974ec1ebc","Jake Rockland"
 61 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-15","0x3f870d006185cb649c3261013fd86cc89b762f1e","ge1doot"
 62 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-150","0xd1a8e61252db6ed86633e536be445c6f4296d875","wuwa"
 63 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-151","0x707502591380bcfa98175486a38c15ce90e82097","Artem Verkhovskiy x Andy Shaw"
 64 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-152","0x47144372eb383466d18fc91db9cd0396aa6c87a4","Steve Pikelny"
 65 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-153","0xc2d9e788980f9183356e5dcad1f7a457eaf8068e","Vamoss"
 66 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-154","0x5706542bb1e2ea5a10f820ea9e23aefce4858629","espina"
 67 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-155","0x842a0bd434377f771770f2870961bc49742d9435",""
 68 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-156","0x65de6475258e3736e5c7d502d2c0a45710c9ec37","r4v3n"
 69 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-157","0x35f64560c51c8772f75186a8931929589b7c8d80","Beervangeer"
 70 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-158","0xec35bd10c93baad1155390e8bc3452af0b806564","Roman Janajev"
 71 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-159","0xdcf4de0cd2bad3579ede845fd5c3442b6c8f9ddc","Monica Rizzolli"
 72 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-16","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
 73 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-160","0x842a0bd434377f771770f2870961bc49742d9435","Marcin Ignac"
 74 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-161","0xadc1d4b58f8c867be281fd5fb164bf4f6db66c2c","john provencher"
 75 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-162","0xecc6043947fd65a7ba93e755d42594c7c7bc2cdb","Generative Artworks"
 76 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-163","0xcab81f14a3fc98034a05bab30f8d0e53e978c833","Matt DesLauriers"
 77 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-164","0x457ee5f723c7606c12a7264b52e285906f91eea6","Casey REAS"
 78 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-165","0x60c38a49a6ee0b33f7ad559ca90800710da90766","Jimmy Herdberg"
 79 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-166","0xd0c3339848fb597abd46fa650e3e411715f0bfb8","Shane Rich | raregonzo"
 80 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-167","0xf565d79c35758c752d3debfdd380d4eb16a3c6e3","NumbersInMotion"
 81 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-168","0x51fff465eafe02c91ac29a65d4071badf1b79543","Blockchance"
 82 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-169","0x529d928c7debb7a16a291a1ba3a84a4a0dbb5289","toiminto"
 83 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-17","0x911463faacb3d0153522e768ee47dc0d6ad5dc5c","Simon De Mai"
 84 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-170","0x7d8d846f24ce0d2c69fcf557edb92f4f8f9aebc1","Paweł Dudko"
 85 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-171","0x0c192889c5a96fb5a541df829b5233b9df3418e6","Sarah Ridgley"
 86 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-172","0x92fb249865ae0d26120031868ba07434674a1600","Aaron Penne x Boreta"
 87 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-173","0x9546c0f8260cc1560a0db625ea4eec1a823866ac","Piter Pasma"
 88 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-174","0x439f64293716d6778c0c7ffd10e1ebdd33d63672","Ryan Struhl"
 89 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-175","0x5e8a9afad6225118ed0f4c1fe944924262fe61c4","ixnayokay"
 90 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-176","0xe00712086490734ef8b4d72839a7237e505767f5","Zach Lieberman"
 91 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-177","0xe394104f0871b6f1fd46b2de688c3ea6f4cc84dd","Mark Cotton"
 92 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-178","0x8bb1a6245603a30eb5b3bf51c369089927979a5f","Ryan Green"
 93 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-179","0x01cb023186cab05220554ee75b4d69921dd051f1","Bård Ionson"
 94 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-18","0x745cfab7b52a45cdb75bdbcdb6e4562ef25f166b","DCA"
 95 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-180","0xe18fc96ba325ef22746ada9a82d521845a2c16f8","hideo"
 96 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-181","0x983f10b69c6c8d72539750786911359619df313d","Matto"
 97 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-182","0xc7cf7edd6ea7aac57db1929d74a013366cacf0df","last even"
 98 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-183","0xf429690d7f1b2ef1dd77c150831f4367e366aeac","Owen Moore"
 99 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-184","0x54fcfccdcaabd65e107a33edfc0e83ee2c621ec0","Eltono"
100 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-185","0xf48e3a3bcca259005527f395c4080cd68a80a0fe","LIA"
101 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-186","0x298c75883ffe510dad92e5a0ecca9bcc8d77c013",""
102 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/data/create_csvs.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import csv
  3 | from datetime import datetime
  4 | import pandas as pd
  5 | 
  6 | PROJECTS_CSV = "projects.csv"
  7 | ACCOUNTS_CSV = "accounts.csv"
  8 | TOKENS_CSV = "tokens.csv"
  9 | SALES_CSV = "sales.csv"
 10 | 
 11 | 
 12 | def sort_sales():
 13 |     df = pd.read_csv(SALES_CSV)
 14 |     sorted_df = df.sort_values(by=["timestamp"], ascending=True)
 15 |     sorted_df.to_csv(SALES_CSV, index=False)
 16 | 
 17 | 
 18 | def main():
 19 |     with open('projects_and_sales.json') as f:
 20 |         data = json.load(f)
 21 | 
 22 |     projects = data["data"]["projects"]
 23 | 
 24 |     with open(PROJECTS_CSV, 'w') as projects_file:
 25 |         with open(ACCOUNTS_CSV, 'w') as accounts_file:
 26 |             with open(TOKENS_CSV, 'w') as tokens_file:
 27 |                 with open(SALES_CSV, 'w') as sales_file:
 28 | 
 29 |                     projects_header = ['project_id', 'contract_id',
 30 |                                        'project_name', 'active', 'complete', 'locked', 'website']
 31 |                     accounts_header = ['project_id',
 32 |                                        'account_id', 'account_name']
 33 |                     tokens_header = ['project_id',
 34 |                                      'owner_id', 'token_id', 'created_at']
 35 |                     sales_header = ['project_id', 'sale_id', 'token_id', 'seller_id',
 36 |                                     'buyer_id', 'payment_token', 'price', 'block_number', 'timestamp', 'datetime']
 37 | 
 38 |                     projects_writer = csv.DictWriter(
 39 |                         projects_file, quoting=csv.QUOTE_ALL, fieldnames=projects_header)
 40 |                     accounts_writer = csv.DictWriter(
 41 |                         accounts_file, quoting=csv.QUOTE_ALL, fieldnames=accounts_header)
 42 |                     tokens_writer = csv.DictWriter(
 43 |                         tokens_file, quoting=csv.QUOTE_ALL, fieldnames=tokens_header)
 44 |                     sales_writer = csv.DictWriter(
 45 |                         sales_file, quoting=csv.QUOTE_ALL, fieldnames=sales_header)
 46 | 
 47 |                     projects_writer.writeheader()
 48 |                     accounts_writer.writeheader()
 49 |                     tokens_writer.writeheader()
 50 |                     sales_writer.writeheader()
 51 | 
 52 |                     for project in projects:
 53 | 
 54 |                         # all info for projects.csv
 55 |                         project_id = project["id"]
 56 |                         contract_id = project_id.split("-")[0]
 57 |                         project_name = project["name"]
 58 |                         active = project["active"]
 59 |                         complete = project["complete"]
 60 |                         locked = project["locked"]
 61 |                         website = project["website"]
 62 | 
 63 |                         # add row in projects.csv
 64 |                         projects_writer.writerow({
 65 |                             'project_id': project_id,
 66 |                             'contract_id': contract_id,
 67 |                             'project_name': project_name,
 68 |                             'active': active,
 69 |                             'complete': complete,
 70 |                             'locked': locked,
 71 |                             'website': website,
 72 |                         })
 73 | 
 74 |                         # all info for accounts.csv
 75 |                         account_id = project["artistAddress"]
 76 |                         account_name = project["artistName"]
 77 | 
 78 |                         # add row in accounts.csv
 79 |                         accounts_writer.writerow({
 80 |                             'project_id': project_id,
 81 |                             'account_id': account_id,
 82 |                             'account_name': account_name
 83 |                         })
 84 | 
 85 |                         tokens = project["tokens"]
 86 |                         for token in tokens:
 87 |                             # all info for tokens.csv
 88 |                             token_id = token["id"]
 89 |                             owner_id = token["owner"]["id"]
 90 |                             created_at = token["createdAt"]
 91 | 
 92 |                             # add row in tokens.csv
 93 |                             tokens_writer.writerow({
 94 |                                 'project_id': project_id,
 95 |                                 'owner_id': owner_id,
 96 |                                 'token_id': token_id,
 97 |                                 'created_at': created_at
 98 |                             })
 99 | 
100 |                         sales = project["openSeaSaleLookupTables"]
101 |                         for sale in sales:
102 |                             # all info for sales.csv
103 |                             sale_id = sale["openSeaSale"]["id"]
104 |                             seller_id = sale["openSeaSale"]["seller"]
105 |                             buyer_id = sale["openSeaSale"]["buyer"]
106 |                             payment_token = sale["openSeaSale"]["paymentToken"]
107 |                             price = sale["openSeaSale"]["price"]
108 |                             timestamp = sale["openSeaSale"]["blockTimestamp"]
109 |                             dt_object = datetime.fromtimestamp(int(timestamp))
110 | 
111 |                             # there is one token in each sale, and it's first in list of sales -> [0]
112 |                             sold_token_id = sale["openSeaSale"]["openSeaSaleLookupTables"][0]["token"]["id"]
113 |                             block_number = sale["openSeaSale"]["blockNumber"]
114 | 
115 |                             # add row in sales.csv
116 |                             sales_writer.writerow({
117 |                                 'project_id': project_id,
118 |                                 'sale_id': sale_id,
119 |                                 'token_id': sold_token_id,
120 |                                 'seller_id': seller_id,
121 |                                 'buyer_id': buyer_id,
122 |                                 'payment_token': payment_token,
123 |                                 'price': price,
124 |                                 'block_number': block_number,
125 |                                 'timestamp': timestamp,
126 |                                 'datetime': dt_object
127 |                             })
128 | 
129 |     sort_sales()
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     main()
134 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/data/projects.csv:
--------------------------------------------------------------------------------
  1 | "project_id","contract_id","project_name","active","complete","locked","website"
  2 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-0","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Chromie Squiggle","True","False","True","https://www.twitter.com/artonblockchain"
  3 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-1","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Genesis","True","True","True","https://www.instagram.com/dacaldera/"
  4 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-2","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Construction Token","True","True","True","https://www.jeffgdavis.com/"
  5 | "0x28f2d3805652fb5d359486dffb7d08320d403240-0","0x28f2d3805652fb5d359486dffb7d08320d403240","The Family Mooks","True","False","False",""
  6 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-10","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","NimBuds","True","True","True",""
  7 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-100","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","CENTURY","True","True","False","https://reas.com"
  8 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-101","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Enchiridion","True","True","False","https://instagram.com/generativeartworks/"
  9 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-102","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","I Saw It in a Dream","True","True","False","https://steviep.xyz/i-saw-it-in-a-dream"
 10 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-103","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Octo Garden","True","True","False","https://www.richlord.com/octo-garden"
 11 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-104","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Eccentrics","True","True","False",""
 12 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-105","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gizmobotz","True","True","False","https://www.instagram.com/cottonchipper/"
 13 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-106","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Radiance","True","True","False","https://www.instagram.com/julienv3ga/"
 14 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-107","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Low Tide","True","True","False","https://linktr.ee/LowTideAB"
 15 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-108","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Divisions","True","True","False","https://linktr.ee/_mconnolly_"
 16 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-109","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Speckled Summits","True","True","False","https://twitter.com/purphat"
 17 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-11","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","HyperHash","True","True","True","https://www.beervangeer.nl/hyperhash/"
 18 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-110","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Lava Glow","True","True","False","https://twitter.com/jhelf"
 19 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-111","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","70s Pop Ghost Bonus Pack 👻","True","True","False","https://70sPop.love"
 20 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-112","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Alien Clock","True","True","False","https://www.theblocksofart.com/"
 21 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-113","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","celestial cyclones","True","True","False","https://hideocode.art"
 22 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-114","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","glitch crystal monsters","True","True","False","https://instagram.com/alidasun"
 23 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-115","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Dot Grid","True","True","False","https://www.theelephantnl.art"
 24 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-116","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Flowers","True","True","False","https://rvig.art/Flowers.html"
 25 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-117","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Transitions","True","True","False","https://mattbilfield.com/transitions-on-artblocks"
 26 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-118","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","LeWitt Generator Generator","True","True","False","https://chan.gallery"
 27 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-119","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ecumenopolis","True","True","False","https://www.instagram.com/gengeomergence/"
 28 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-12","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Unigrids","True","True","True","https://zeblocks.com/"
 29 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-120","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Endless Nameless","True","True","False","https://www.newrafael.com/"
 30 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-121","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rinascita","True","True","False","https://stefanocontiero.com/h"
 31 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-122","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Cells","True","True","False","https://twitter.com/HeveyArt"
 32 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-123","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Nucleus","True","True","False","https://twitter.com/HjalmarAstrom"
 33 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-124","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","The Liths of Sisyphus","True","True","False","https://www.instagram.com/_nonfigurativ_/"
 34 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-125","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Calendart","True","True","False","https://twitter.com/ssteeenn"
 35 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-126","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Timepiece","True","True","False","https://twitter.com/wawaa_studio"
 36 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-127","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Labyrometry","True","True","False","https://twitter.com/prettyblocks"
 37 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-128","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Scribbled Boundaries","False","False","False",""
 38 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-129","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Pigments","True","True","False","https://www.instagram.com/darien.brito/"
 39 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-13","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ringers","True","True","True","https://twitter.com/dmitricherniak"
 40 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-130","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Obicera","True","True","False",""
 41 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-131","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Scribbled Boundaries","True","True","False","https://scribbled-boundaries.webflow.io"
 42 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-132","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Tangled","True","True","False","https://www.instagram.com/superblob/"
 43 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-133","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Organized Disruption","True","True","False","https://linktr.ee/praystation"
 44 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-134","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Wave Schematics","True","True","False","https://twitter.com/luxpris"
 45 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-135","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Brushpops","True","True","False","https://supersize.co.il/portfolio/brushpops/"
 46 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-136","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","SpiroFlakes","True","True","False","https://linktr.ee/artBoffin"
 47 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-137","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Alien Insects","True","True","False","https://www.shvembldr.com/"
 48 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-138","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Geometry Runners","True","True","False","https://www.richlord.com/"
 49 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-139","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Eccentrics 2: Orbits","True","True","False","https://twitter.com/robdixon"
 50 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-14","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Cyber Cities","True","True","False",""
 51 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-140","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Good Vibrations","True","True","False","https://twitter.com/IOivm"
 52 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-141","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rapture","True","True","False","https://data-imaginist.com/rapture"
 53 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-142","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Unknown Signals","True","True","False","https://twitter.com/_k0ch"
 54 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-143","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","phase","True","True","False","https://twitter.com/LorenBednar"
 55 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-144","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","autoRAD","True","True","False","http://www.sgtslaughtermelon.com/art"
 56 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-145","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Beatboxes","True","True","False","https://zeblocks.com/"
 57 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-146","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Neighborhood","True","True","False","https://www.habitat.org/"
 58 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-147","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Trossets","True","True","False","https://www.annacarreras.com"
 59 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-148","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","ørß1t$","False","False","False","https://twitter.com/berkozdemir"
 60 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-149","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Dot Matrix Gradient Study","True","True","False","https://twitter.com/purphat"
 61 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-15","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Utopia","True","True","False","https://twitter.com/ge1doot"
 62 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-150","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","PrimiLife","True","True","False","https://wuwa.org/primitives"
 63 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-151","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","High Tide","True","True","False","https://linktr.ee/LowTideAB"
 64 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-152","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Fake Internet Money","True","True","False","https://steviep.xyz/fake-internet-money"
 65 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-153","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","We","True","True","False","https://vamoss.com.br"
 66 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-154","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Warp","True","True","False",""
 67 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-155","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Undefined","False","False","False",""
 68 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-156","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Moments","True","True","False","https://twitter.com/r4v3n_art"
 69 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-157","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","UltraWave 369","True","True","False","http://www.beervangeer.nl"
 70 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-158","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","a heart and a soul","True","True","False","https://twitter.com/sirdiekblak"
 71 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-159","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Fragments of an Infinite Field","True","True","False",""
 72 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-16","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Color Study","True","True","False","https://www.jeffgdavis.com"
 73 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-160","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Seadragons","True","True","False","http://marcinignac.com/projects/seadragons/"
 74 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-161","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","spawn","True","True","False","https://johnprovencher.com"
 75 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-162","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Democracity","True","True","False","https://instagram.com/generativeartworks/"
 76 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-163","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Meridian","True","True","False","https://mattdesl.com/"
 77 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-164","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Phototaxis","True","True","False","https://reas.com"
 78 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-165","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gravity 16","True","True","False","https://herdberg.com"
 79 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-166","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ouroboros","True","True","False","https://raregonzo.art"
 80 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-167","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Blaschke Ballet","True","True","False","https://www.instagram.com/numbersinmotion/"
 81 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-168","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Bloom","True","False","False","https://blockchance.io"
 82 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-169","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Augmented Sequence","True","True","False","https://toiminto.swaeg.net"
 83 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-17","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Spectron","True","True","True","https://spectron.netlify.app"
 84 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-170","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Chroma Theory","True","False","False","https://www.pdudko.com/p/chroma-theory.html"
 85 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-171","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Himinn","True","True","False","https://twitter.com/sarah_ridgley"
 86 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-172","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rituals - Venice","True","True","False","https://www.ritualsirl.com/"
 87 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-173","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Skulptuur","True","True","False","https://piterpasma.nl/skulptuur"
 88 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-174","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Letters to My Future Self","True","True","False","https://www.instagram.com/rwstruhl/"
 89 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-175","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","mono no aware","True","False","False","http://ixnayokay.art"
 90 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-176","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Color Gradient Studies","False","False","False","https://www.instagram.com/zach.lieberman/"
 91 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-177","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Space Birds","True","False","False","https://medium.com/@markcotton_81658/inspiration-for-and-development-of-space-birds-on-artblocks-io-560af0d1a6a9"
 92 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-178","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Beauty in the Hurting","True","True","False","https://linktr.ee/ryangreen8"
 93 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-179","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","8","True","False","False","https://collect.bardionson.com/eight"
 94 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-18","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gen 2","True","True","False",""
 95 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-180","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","mecha suits","True","True","False","https://hideocode.art/"
 96 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-181","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","FOCUS","True","False","False","https://zenerative.com/focus/"
 97 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-182","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Amoeba","True","False","False","https://lasteven.xyz"
 98 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-183","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Quarantine","True","False","False","https://www.owenmoore.art/artwork/quarantine"
 99 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-184","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Swing","False","False","False","https://www.eltono.com/lab/projects/swing/"
100 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-185","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","little boxes on the hillsides, child","True","False","False","https://liaworks.com"
101 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-186","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","THE SOURCE CoDE","False","False","False",""
102 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/produce.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import stream.producer as producer
 3 | 
 4 | DATA = "data/sales.csv"
 5 | 
 6 | 
 7 | def generate():
 8 |     while True:
 9 |         with open(DATA) as file:
10 |             file.readline()
11 |             for line in file.readlines():
12 |                 line_list = line.strip().split(",")
13 |                 line_json = {
14 |                     'project_id': line_list[0],
15 |                     'sale_id': line_list[1],
16 |                     'token_id': line_list[2],
17 |                     'seller_id': line_list[3],
18 |                     'buyer_id': line_list[4],
19 |                     'payment_token': line_list[5],
20 |                     'price': line_list[6],
21 |                     'block_number': line_list[7],
22 |                     'datetime': line_list[9]
23 |                 }
24 |                 yield line_json
25 | 
26 | 
27 | def main():
28 |     producer.run(generate)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/datasets/art-blocks/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | pika==1.2.0
3 | pulsar-client==2.10.0
4 | 


--------------------------------------------------------------------------------
/datasets/github/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | # Install packages
 4 | COPY /datasets/github/requirements.txt ./
 5 | RUN pip3 install -r requirements.txt
 6 | 
 7 | COPY /datasets/github/ /app/
 8 | COPY /stream /app/stream/
 9 | WORKDIR /app
10 | 


--------------------------------------------------------------------------------
/datasets/github/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> :bar_chart: GitHub Commits Streaming :bar_chart:</h1>
 2 | 
 3 | ## :speech_balloon: About
 4 | 
 5 | GitHub is a provider of Internet hosting for software development and version
 6 | control using Git. It offers the distributed version control and source code
 7 | management (SCM) functionality of Git, plus its own features. It provides access
 8 | control and several collaboration features such as bug tracking, feature
 9 | requests, task management, continuous integration and wikis for every project.
10 | 
11 | ## :open_file_folder: Dataset
12 | 
13 | File `data/github-network.csv` was obtained using the custom GitHub scraper in
14 | [/scraper](./scraper). To create the CSV file, place yourself into the `scraper`
15 | directory and run:
16 | 
17 | ```
18 | python3 scraper.py
19 | ```
20 | 
21 | ## :fast_forward: Streaming commits
22 | 
23 | Check the instructions in the root [README](../../README.md).
24 | 
25 | ## :scroll: References
26 | 
27 | <a id="1">[1]</a> Learn about GitHub at their
28 | [website](https://www.github.com/).
29 | 


--------------------------------------------------------------------------------
/datasets/github/produce.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import csv
 3 | import stream.producer as producer
 4 | 
 5 | DATA = "data/github-network.csv"
 6 | 
 7 | 
 8 | def generate():
 9 |     while True:
10 |         with open(DATA) as file:
11 |             csvReader = csv.DictReader(file)
12 |             for rows in csvReader:
13 |                 data = {
14 |                     'commit': rows['commit'],
15 |                     'author': rows['author'],
16 |                     'followers': ast.literal_eval(rows['followers']),
17 |                     'following': ast.literal_eval(rows['following']),
18 |                 }
19 |                 yield data
20 | 
21 | 
22 | def main():
23 |     producer.run(generate)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/datasets/github/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | pika==1.2.0
3 | pulsar-client==2.10.0
4 | 


--------------------------------------------------------------------------------
/datasets/github/scraper/dependency_graph.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | dependents_list = []
 6 | 
 7 | 
 8 | def get_dependents(repo):
 9 |     page_num = 3000
10 |     url = 'https://github.com/{}/network/dependents'.format(repo)
11 | 
12 |     for i in range(page_num):
13 |         print("GET " + url)
14 |         r = requests.get(url)
15 |         print(r)
16 |         soup = BeautifulSoup(r.content, "html.parser")
17 | 
18 |         dependents_exist = soup.find('h3', {"data-view-component": "true"})
19 |         if(dependents_exist and dependents_exist.text == "We haven’t found any dependents for this repository yet."):
20 |             return {}
21 | 
22 |         data = [
23 |             "{}/{}".format(
24 |                 t.find('a', {"data-repository-hovercards-enabled": ""}).text,
25 |                 t.find('a', {"data-hovercard-type": "repository"}).text
26 |             )
27 |             for t in soup.findAll("div", {"class": "Box-row"})
28 |         ]
29 |         dependents_list.extend(data)
30 | 
31 |         next_url = soup.find(
32 |             "div", {"class": "paginate-container"})
33 |         next_disabled = soup.find(
34 |             "button", {"disabled": "disabled"})
35 |         if(not next_url or next_disabled):
36 |             return dependents_list
37 | 
38 |         url = next_url.find('a')["href"]
39 |     return dependents_list
40 | 
41 | 
42 | def main():
43 |     dependents = get_dependents("memgraph/pymgclient")
44 |     print(len(dependents))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/datasets/github/scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.10.0


--------------------------------------------------------------------------------
/datasets/github/scraper/scraper.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | from github import Github
  3 | from tqdm import tqdm
  4 | import csv
  5 | import dependency_graph
  6 | 
  7 | 
  8 | def parse_args():
  9 |     """
 10 |     Parse command line arguments.
 11 |     """
 12 |     parser = ArgumentParser(description=__doc__)
 13 |     parser.add_argument("--token", default="", type=str, help="Host address.")
 14 |     parser.add_argument("--repo", default="networkx/networkx",
 15 |                         type=str, help="Root repository for the network.")
 16 |     parser.add_argument(
 17 |         "--csv-output",
 18 |         default="../data/github-network.csv",
 19 |         help="Name of the CSV file.",
 20 |     )
 21 |     parser.add_argument(
 22 |         "--all",
 23 |         default=True,
 24 |         action="store_true",
 25 |         help="Generate the whole network."
 26 |     )
 27 |     parser.add_argument("--dependents", default=False, action="store_true",
 28 |                         help="Include dependents in the network.")
 29 |     parser.add_argument("--contributors", default=False, action="store_true",
 30 |                         help="Include contributors in the network.")
 31 |     parser.add_argument("--followers", default=True, action="store_true",
 32 |                         help="Include followers in the network.")
 33 |     parser.add_argument("--following", default=True, action="store_true",
 34 |                         help="Include following in the network.")
 35 |     print(__doc__)
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | args = parse_args()
 40 | g = Github("<TOKEN>")
 41 | 
 42 | 
 43 | def get_contributors(repo):
 44 |     contributors = repo.get_contributors()
 45 |     number_of_contributors = contributors.totalCount
 46 |     print('Number of contributors:', number_of_contributors)
 47 |     return contributors
 48 | 
 49 | 
 50 | def get_followers(author):
 51 |     followers = author.get_followers()
 52 |     followers_names = []
 53 |     for follower in followers:
 54 |         followers_names.append(follower.login)
 55 |     return followers_names
 56 | 
 57 | 
 58 | def get_following(author):
 59 |     following = author.get_following()
 60 |     following_names = []
 61 |     for follows in following:
 62 |         following_names.append(follows.login)
 63 |     return following_names
 64 | 
 65 | 
 66 | def get_commits(repo):
 67 |     commits = repo.get_commits()
 68 |     number_of_commits = commits.totalCount
 69 |     print('Number of commits:', number_of_commits)
 70 |     return commits
 71 | 
 72 | 
 73 | def get_dependents(repo_name):
 74 |     dependents = dependency_graph.get_dependents(repo_name)
 75 |     number_of_dependents = len(dependents)
 76 |     print('Number of dependents:', number_of_dependents)
 77 |     return dependents
 78 | 
 79 | 
 80 | def scrape_data(repositories):
 81 |     for repo in repositories:
 82 |         repo_name = repo.full_name
 83 |         print(repo_name)
 84 | 
 85 |         if (args.dependents):
 86 |             dependents = get_dependents(repo_name)
 87 | 
 88 |         if (args.contributors):
 89 |             contributors = get_contributors(repo)
 90 | 
 91 |         fieldnames = ['commit', 'author', 'followers', 'following']
 92 |         with open(args.csv_output, 'w', encoding='UTF8', newline='') as f:
 93 |             writer = csv.DictWriter(f, fieldnames=fieldnames)
 94 |             writer.writeheader()
 95 |             commits = get_commits(repo)
 96 |             for commit in tqdm(commits):
 97 |                 author = commit.author
 98 |                 print(author)
 99 |                 if not author:
100 |                     continue
101 |                 followers = []
102 |                 if (args.all or args.followers):
103 |                     followers = get_followers(author)
104 | 
105 |                 following = []
106 |                 if (args.all or args.following):
107 |                     following = get_following(author)
108 | 
109 |                 writer.writerow({'commit': commit.sha,
110 |                                 'author': author.login,
111 |                                  'followers': followers,
112 |                                  'following': following})
113 | 
114 | 
115 | def main():
116 |     repositories = []
117 |     #repositories = g.search_repositories("q=language:python", "stars", "desc")
118 |     repositories.append(g.get_repo(args.repo))
119 |     scrape_data(repositories)
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     main()
124 | 


--------------------------------------------------------------------------------
/datasets/movielens/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | # Install packages
 4 | COPY /datasets/movielens/requirements.txt ./
 5 | RUN pip3 install -r requirements.txt
 6 | 
 7 | COPY /datasets/movielens/ /app/
 8 | COPY /stream /app/stream/
 9 | WORKDIR /app
10 | 


--------------------------------------------------------------------------------
/datasets/movielens/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> :bar_chart: Movie Ratings Stream :bar_chart:</h1>
 2 | 
 3 | ## :speech_balloon: About
 4 | 
 5 | This dataset contains movie ratings by users.
 6 | 
 7 | ## :open_file_folder: Dataset
 8 | 
 9 | The files `data/movies.csv` and `data/ratings.csv` were obtained through the
10 | site **GroupLens** [[1]](#1).
11 | 
12 | ## :fast_forward: Streaming movie ratings
13 | 
14 | Check the instructions in the root [README](../../README.md).
15 | 
16 | ## :scroll: References
17 | 
18 | <a id="1">[1]</a> [MovieLens
19 | dataset](https://grouplens.org/datasets/movielens/)
20 | 


--------------------------------------------------------------------------------
/datasets/movielens/produce.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import stream.producer as producer
 3 | 
 4 | DATA_RATINGS = "data/ratings.csv"
 5 | DATA_MOVIES = "data/movies.csv"
 6 | movies_dict = {}
 7 | 
 8 | 
 9 | def generate():
10 |     while True:
11 |         with open(DATA_RATINGS) as file:
12 |             csvReader = csv.DictReader(file)
13 |             for rows in csvReader:
14 |                 data = {
15 |                     'userId': rows['userId'],
16 |                     'movie': movies_dict[rows['movieId']],
17 |                     'rating': rows['rating'],
18 |                     'timestamp': rows['timestamp'],
19 |                 }
20 |                 yield data
21 | 
22 | 
23 | def main():
24 |     with open(DATA_MOVIES) as file:
25 |         csvReader = csv.DictReader(file)
26 |         for rows in csvReader:
27 |             movieId = rows['movieId']
28 |             movies_dict[movieId] = {
29 |                 'movieId': movieId,
30 |                 'title': rows['title'],
31 |                 'genres': rows['genres'].split('|')
32 |             }
33 |     producer.run(generate)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/datasets/movielens/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | pika==1.2.0
3 | pulsar-client==2.10.0
4 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: "3"
  2 | 
  3 | networks:
  4 |   app-tier:
  5 |     driver: bridge
  6 | 
  7 | services:
  8 |   memgraph-mage:
  9 |     build: ./memgraph
 10 |     ports:
 11 |       - '7687:7687'
 12 |       - '7444:7444'
 13 |     volumes:
 14 |       - ./memgraph/import-data:/usr/lib/memgraph/import-data
 15 |     entrypoint: [
 16 |       "/usr/lib/memgraph/memgraph",
 17 |       "--telemetry-enabled=false",
 18 |       "--query-modules-directory=/transformations,/usr/lib/memgraph/query_modules",
 19 |       "--log-level=TRACE"]
 20 |     networks:
 21 |       - app-tier
 22 | 
 23 |   zookeeper:
 24 |     image: confluentinc/cp-zookeeper:7.1.1
 25 |     ports:
 26 |       - "2181:2181"
 27 |     environment:
 28 |       ZOOKEEPER_CLIENT_PORT: 2181
 29 |       ZOOKEEPER_SERVER_ID: 1
 30 |       ZOOKEEPER_SERVERS: zookeeper:2888:3888
 31 |     networks:
 32 |       - app-tier
 33 | 
 34 |   kafka:
 35 |     build: ./kafka
 36 |     container_name: kafka
 37 |     ports:
 38 |       - "9092:9092"
 39 |       - "9093:9093"
 40 |       - "9999:9999"
 41 |     environment:
 42 |       KAFKA_ADVERTISED_LISTENERS: LISTENER_DOCKER_INTERNAL://kafka:9092,LISTENER_DOCKER_EXTERNAL://${DOCKER_HOST_IP:-127.0.0.1}:9093
 43 |       KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: LISTENER_DOCKER_INTERNAL:SASL_PLAINTEXT,LISTENER_DOCKER_EXTERNAL:SASL_PLAINTEXT
 44 |       KAFKA_INTER_BROKER_LISTENER_NAME: LISTENER_DOCKER_INTERNAL
 45 |       KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
 46 |       KAFKA_BROKER_ID: 1
 47 |       KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
 48 |       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
 49 |       KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
 50 |       KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
 51 |       KAFKA_JMX_PORT: 9999
 52 |       KAFKA_JMX_HOSTNAME: ${DOCKER_HOST_IP:-127.0.0.1}
 53 |       KAFKA_AUTHORIZER_CLASS_NAME: kafka.security.authorizer.AclAuthorizer
 54 |       KAFKA_ALLOW_EVERYONE_IF_NO_ACL_FOUND: "true"
 55 |       KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf"
 56 |       KAFKA_SASL_ENABLED_MECHANISMS: PLAIN
 57 |       KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: PLAIN
 58 |       ZOOKEEPER_SASL_ENABLED: "false"
 59 |     volumes:
 60 |       - ./kafka/kafka_server_jaas.conf:/etc/kafka/kafka_server_jaas.conf
 61 |       - ./kafka/connect.properties:/etc/kafka/connect.properties
 62 |     command: sh -c "/etc/confluent/docker/run"
 63 |     depends_on:
 64 |       - zookeeper
 65 |     networks:
 66 |       - app-tier
 67 | 
 68 |   redpanda:
 69 |     command:
 70 |       - redpanda
 71 |       - start
 72 |       - --smp
 73 |       - "1"
 74 |       - --reserve-memory
 75 |       - 0M
 76 |       - --overprovisioned
 77 |       - --node-id
 78 |       - "0"
 79 |       - --kafka-addr
 80 |       - PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9094
 81 |       - --advertise-kafka-addr
 82 |       - PLAINTEXT://redpanda:29092,OUTSIDE://0.0.0.0:9094
 83 |     image: docker.vectorized.io/vectorized/redpanda:v21.9.5
 84 |     ports:
 85 |       - 9094:9094
 86 |       - 29092:29092
 87 |     networks:
 88 |       - app-tier
 89 | 
 90 |   rabbitmq:
 91 |     image: rabbitmq:3-management-alpine
 92 |     ports:
 93 |       - 5672:5672
 94 |       - 15672:15672
 95 |     networks:
 96 |       - app-tier
 97 | 
 98 |   pulsar:
 99 |     image: apachepulsar/pulsar:2.6.0
100 |     ports:
101 |       - 8080:8080
102 |       - 6650:6650
103 |     environment:
104 |       PULSAR_MEM: " -Xms512m -Xmx512m -XX:MaxDirectMemorySize=1g"
105 |     command: bin/pulsar standalone
106 |     networks:
107 |       - app-tier
108 | 
109 |   core:
110 |     image: tianon/true
111 |     restart: "no"
112 |     depends_on:
113 |       - kafka
114 |       - redpanda
115 |       - pulsar
116 |       - rabbitmq
117 | 
118 |   art-blocks:
119 |     build:
120 |       context: ./
121 |       dockerfile: ./datasets/art-blocks/Dockerfile
122 |     entrypoint: [ "python3", "produce.py", "--stream-delay", "1.0", "--consumer" ]
123 |     env_file: platform_variables.env
124 |     environment:
125 |       KAFKA_TOPIC: "sales"
126 |       REDPANDA_TOPIC: "sales"
127 |       RABBITMQ_QUEUE: "sales"
128 |       PULSAR_TOPIC: "sales"
129 |     networks:
130 |       - app-tier
131 | 
132 |   github:
133 |     build:
134 |       context: ./
135 |       dockerfile: ./datasets/github/Dockerfile
136 |     entrypoint: [ "python3", "produce.py", "--stream-delay", "1.0", "--consumer" ]
137 |     env_file: platform_variables.env
138 |     environment:
139 |       KAFKA_TOPIC: "github"
140 |       REDPANDA_TOPIC: "github"
141 |       RABBITMQ_QUEUE: "github"
142 |       PULSAR_TOPIC: "github"
143 |     networks:
144 |       - app-tier
145 | 
146 |   movielens:
147 |     build:
148 |       context: ./
149 |       dockerfile: ./datasets/movielens/Dockerfile
150 |     entrypoint: [ "python3", "produce.py", "--stream-delay", "1.0", "--consumer" ]
151 |     env_file:
152 |       platform_variables.env
153 |     environment:
154 |       KAFKA_TOPIC: "ratings"
155 |       REDPANDA_TOPIC: "ratings"
156 |       RABBITMQ_QUEUE: "ratings"
157 |       PULSAR_TOPIC: "ratings"
158 |     networks:
159 |       - app-tier
160 | 
161 |   amazon-books:
162 |     build:
163 |       context: ./
164 |       dockerfile: ./datasets/amazon-books/Dockerfile
165 |     entrypoint: [ "python3", "produce.py", "--stream-delay", "1.0", "--consumer" ]
166 |     env_file:
167 |       platform_variables.env
168 |     environment:
169 |       KAFKA_TOPIC: "book-ratings"
170 |       REDPANDA_TOPIC: "book-ratings"
171 |       RABBITMQ_QUEUE: "book-ratings"
172 |       PULSAR_TOPIC: "book-ratings"
173 |     networks:
174 |       - app-tier
175 |       
176 | 
177 |   art-blocks-analysis:
178 |     build:
179 |       context: ./
180 |       dockerfile: ./data-analysis/Dockerfile
181 |     entrypoint: [ "python3", "art-blocks-analysis.py" ]
182 |     env_file: platform_variables.env
183 |     environment:
184 |       KAFKA_TOPIC: "sales"
185 |       REDPANDA_TOPIC: "sales"
186 |       RABBITMQ_QUEUE: "sales"
187 |       PULSAR_TOPIC: "sales"
188 |     networks:
189 |       - app-tier
190 | 
191 |   art-blocks-memgraph:
192 |     build:
193 |       context: ./
194 |       dockerfile: ./data-analysis/Dockerfile
195 |     entrypoint: [ "python3", "art-blocks-memgraph.py" ]
196 |     environment:
197 |       MEMGRAPH_IP: memgraph-mage
198 |       MEMGRAPH_PORT: "7687"
199 |     depends_on:
200 |       - memgraph-mage
201 |     networks:
202 |       - app-tier
203 | 
204 |   github-analysis:
205 |     build:
206 |       context: ./
207 |       dockerfile: ./data-analysis/Dockerfile
208 |     entrypoint: [ "python3", "github-analysis.py" ]
209 |     env_file: platform_variables.env
210 |     environment:
211 |       KAFKA_TOPIC: "github"
212 |       REDPANDA_TOPIC: "github"
213 |       RABBITMQ_QUEUE: "github"
214 |       PULSAR_TOPIC: "github"
215 |     networks:
216 |       - app-tier
217 | 
218 |   movielens-memgraph:
219 |     build:
220 |       context: ./
221 |       dockerfile: ./data-analysis/Dockerfile
222 |     entrypoint: [ "python3", "movielens-memgraph.py" ]
223 |     environment:
224 |       MEMGRAPH_IP: memgraph-mage
225 |       MEMGRAPH_PORT: "7687"
226 |     depends_on:
227 |       - memgraph-mage
228 |     networks:
229 |       - app-tier
230 | 
231 |   amazon-books-memgraph:
232 |     build:
233 |       context: ./
234 |       dockerfile: ./data-analysis/Dockerfile
235 |     entrypoint: [ "python3", "amazon-books-memgraph.py" ]
236 |     environment:
237 |       MEMGRAPH_IP: memgraph-mage
238 |       MEMGRAPH_PORT: "7687"
239 |     depends_on:
240 |       - memgraph-mage
241 |     networks:
242 |       - app-tier
243 | 
244 |   github-commits-memgraph:
245 |     build:
246 |       context: ./
247 |       dockerfile: ./data-analysis/Dockerfile
248 |     entrypoint: [ "python3", "github-commits-memgraph.py" ]
249 |     environment:
250 |       MEMGRAPH_IP: memgraph-mage
251 |       MEMGRAPH_PORT: "7687"
252 |     depends_on:
253 |       - memgraph-mage
254 |     networks:
255 |       - app-tier
256 | 


--------------------------------------------------------------------------------
/kafka/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM confluentinc/cp-kafka:7.1.1
2 | 
3 | COPY init.sh /usr/local/bin/
4 | ENTRYPOINT []


--------------------------------------------------------------------------------
/kafka/connect.properties:
--------------------------------------------------------------------------------
1 | security.protocol=SASL_PLAINTEXT
2 | sasl.mechanism=PLAIN
3 | sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
4 |   username="admin" \
5 |   password="admin";
6 | 


--------------------------------------------------------------------------------
/kafka/init.sh:
--------------------------------------------------------------------------------
1 | kafka-acls --bootstrap-server localhost:9092 --command-config /etc/kafka/connect.properties --add --allow-principal User:public --operation READ --topic ratings
2 | kafka-acls --bootstrap-server localhost:9092 --command-config /etc/kafka/connect.properties --add --allow-principal User:admin --operation WRITE --topic ratings
3 | 


--------------------------------------------------------------------------------
/kafka/kafka_server_jaas.conf:
--------------------------------------------------------------------------------
 1 | KafkaServer {
 2 |   org.apache.kafka.common.security.plain.PlainLoginModule required
 3 |   username="admin"
 4 |   password="admin"
 5 |   user_admin="admin"
 6 |   user_public="public"
 7 |   security.protocol=SASL_PLAINTEXT
 8 |   sasl.mechanism=PLAIN;
 9 | };
10 | Client{};


--------------------------------------------------------------------------------
/memgraph/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM memgraph/memgraph-mage:1.3
2 | 
3 | # Copy the local query modules and transformations
4 | COPY transformations/ /transformations
5 | COPY query_modules/ /usr/lib/memgraph/query_modules
6 | 


--------------------------------------------------------------------------------
/memgraph/import-data/accounts.csv:
--------------------------------------------------------------------------------
  1 | "project_id","account_id","account_name"
  2 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-0","0xb998a2520907ed1fc0f9f457b2219fb2720466cd","Snowfro"
  3 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-1","0xa9da6d2b707674a1cf5c3fbdee94c903b030d4e3","DCA"
  4 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-2","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
  5 | "0x28f2d3805652fb5d359486dffb7d08320d403240-0","0x2bc66765dce0e3f4878d78a8cc50cfcb9563b8ec",""
  6 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-10","0x1e8e749b2b578e181ca01962e9448006772b24a2","Bryan Brinkman"
  7 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-100","0x457ee5f723c7606c12a7264b52e285906f91eea6","Casey REAS"
  8 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-101","0xecc6043947fd65a7ba93e755d42594c7c7bc2cdb","Generative Artworks"
  9 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-102","0x47144372eb383466d18fc91db9cd0396aa6c87a4","Steve Pikelny"
 10 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-103","0x9eef6bcbff2b8a77869597842b09ac9d401811b5","Rich Lord"
 11 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-104","0x7e6d4810ea233d7588e3675d704571e29c4bcbba","Radix"
 12 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-105","0xe394104f0871b6f1fd46b2de688c3ea6f4cc84dd","Mark Cotton"
 13 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-106","0x9e192409efff9300432d089e9e3a6183cc26e5c0","Julien Gachadoat"
 14 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-107","0x707502591380bcfa98175486a38c15ce90e82097","Artem Verkhovskiy x Andy Shaw"
 15 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-108","0xf92bb2215684c353b4009395061ee7652883c365","Michael Connolly"
 16 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-109","0xd6dd6961d3224958fcd306b76a991ab974ec1ebc","Jake Rockland"
 17 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-11","0x35f64560c51c8772f75186a8931929589b7c8d80","Beervangeer"
 18 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-110","0xaccbee638fedfe3650be1fa3182b428483db8369","JEANVASCRIPT"
 19 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-111","0x29b2f895343cadfb3f5101bef6484b1f01c83dc9","Daniel Catt"
 20 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-112","0x4666fd1f89576e2d6fb3f2ecec5eefd3e1ba6b59","Shvembldr"
 21 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-113","0xe18fc96ba325ef22746ada9a82d521845a2c16f8","hideo"
 22 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-114","0xe88046be7445f9c21a3062131c166b45fb156110","Alida Sun"
 23 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-115","0x07911f74c5ef0ce80c57ebbf52033774055baa0c","TheElephantNL"
 24 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-116","0x44a1e2883f1e599664e511e6c1c7cc72d846f5fc","RVig"
 25 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-117","0x87f669c0ee22c42be261dd74143e716748ba11ba","Jason Ting x Matt Bilfield"
 26 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-118","0x4bf3805b23c99f8e0a5797e86fd0232a04a2a629","Mitchell F. Chan"
 27 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-119","0x129eb023b2f879b4c7dc4b19e7877bda35789773","Joshua Bagley"
 28 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-12","0xb033daedca113b0386eb3e8f4c72c79fc50ae32e","Zeblocks"
 29 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-120","0x0f441cfad93287109f5ef834bf52f4aaaa8d8ffa","Rafaël Rozendaal"
 30 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-121","0x161b79d4e135693361cb42b6a3e8067c8c34e744","Stefano Contiero"
 31 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-122","0x93f7cb21d6904492b33e0df24008c8b13ce64380","Hevey"
 32 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-123","0xfa0bf8ed3b94033129e061c968b3ec290c1d9e33","Hjalmar Åström"
 33 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-124","0xda8457bcc1096b4c66316b0a40c165d681bf244c","nonfigurativ"
 34 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-125","0xf359de2378bf25373a33a64e1f9b257673e3320c","steen & n-e-o"
 35 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-126","0x65bd6a518c0d58d314034d519ce69b3e05a806e4","WAWAA"
 36 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-127","0x51582e2c703b0d7c745c6a4ae0336c98c3c41802","Eliya Stein"
 37 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-128","0xb783cd9f3e74d52b320904292e0fbe720d333d97",""
 38 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-129","0xe452517f920950b5977bdc0387bedbe5253954c2","Darien Brito"
 39 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-13","0xe0753cfcabb86c2828b79a3ddd4faf6af0db0eb4","Dmitri Cherniak"
 40 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-130","0x2776621ff536af829919ab6cba8db434aeba43f9","Alexis André"
 41 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-131","0xb783cd9f3e74d52b320904292e0fbe720d333d97","William Tan"
 42 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-132","0xac80dc4112f7757c05d65c773e0803ae8af7b834","Superblob"
 43 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-133","0xfbc78f494ad61d90f02a3258e527de1321095acb","Joshua Davis / PrayStation"
 44 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-134","0xbd3527f0c0f6bd513f0a1560fc0108a291c82806","luxpris"
 45 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-135","0x12c0a19094a79feb81ee74501e67e3215b53b7dc","Matty Mariansky"
 46 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-136","0xb2a2d7eee0f6d9e0465d18e3ebdc7a3a78612cc0","Alexander Reben"
 47 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-137","0x4666fd1f89576e2d6fb3f2ecec5eefd3e1ba6b59","Shvembldr"
 48 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-138","0x9eef6bcbff2b8a77869597842b09ac9d401811b5","Rich Lord"
 49 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-139","0x7e6d4810ea233d7588e3675d704571e29c4bcbba","Radix"
 50 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-14","0x5f127b4323c0061768976ad34a1a2beb9db19886","pxlq"
 51 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-140","0x1f5743df7c907a74c8cc28fe0e27c575830ac6a6","Aluan Wang"
 52 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-141","0x5a77b2d05afb67eacd8f9c7e98692dd1e2883cb3","Thomas Lin Pedersen"
 53 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-142","0x9441295f5a5f77c090ae106f6724510f07fc4bca","k0ch"
 54 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-143","0x7f5a0a6847fd0fa05c13cbc02f435047b429e37c","Loren Bednar"
 55 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-144","0xa3e51498579db0f7bb1ec9e3093b2f44158e25a5","sgt_slaughtermelon & Tartaria Archivist"
 56 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-145","0xb033daedca113b0386eb3e8f4c72c79fc50ae32e","Zeblocks"
 57 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-146","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
 58 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-147","0x32f848b9436f6400e5fa1fd46e9b96f4541c0966","Anna Carreras"
 59 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-148","0xc5e08104c19dafd00fe40737490da9552db5bfe5","berk"
 60 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-149","0xd6dd6961d3224958fcd306b76a991ab974ec1ebc","Jake Rockland"
 61 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-15","0x3f870d006185cb649c3261013fd86cc89b762f1e","ge1doot"
 62 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-150","0xd1a8e61252db6ed86633e536be445c6f4296d875","wuwa"
 63 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-151","0x707502591380bcfa98175486a38c15ce90e82097","Artem Verkhovskiy x Andy Shaw"
 64 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-152","0x47144372eb383466d18fc91db9cd0396aa6c87a4","Steve Pikelny"
 65 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-153","0xc2d9e788980f9183356e5dcad1f7a457eaf8068e","Vamoss"
 66 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-154","0x5706542bb1e2ea5a10f820ea9e23aefce4858629","espina"
 67 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-155","0x842a0bd434377f771770f2870961bc49742d9435",""
 68 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-156","0x65de6475258e3736e5c7d502d2c0a45710c9ec37","r4v3n"
 69 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-157","0x35f64560c51c8772f75186a8931929589b7c8d80","Beervangeer"
 70 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-158","0xec35bd10c93baad1155390e8bc3452af0b806564","Roman Janajev"
 71 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-159","0xdcf4de0cd2bad3579ede845fd5c3442b6c8f9ddc","Monica Rizzolli"
 72 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-16","0x7d42611012fdbe366bf4a0481fc0e1abf15e245a","Jeff Davis"
 73 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-160","0x842a0bd434377f771770f2870961bc49742d9435","Marcin Ignac"
 74 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-161","0xadc1d4b58f8c867be281fd5fb164bf4f6db66c2c","john provencher"
 75 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-162","0xecc6043947fd65a7ba93e755d42594c7c7bc2cdb","Generative Artworks"
 76 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-163","0xcab81f14a3fc98034a05bab30f8d0e53e978c833","Matt DesLauriers"
 77 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-164","0x457ee5f723c7606c12a7264b52e285906f91eea6","Casey REAS"
 78 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-165","0x60c38a49a6ee0b33f7ad559ca90800710da90766","Jimmy Herdberg"
 79 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-166","0xd0c3339848fb597abd46fa650e3e411715f0bfb8","Shane Rich | raregonzo"
 80 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-167","0xf565d79c35758c752d3debfdd380d4eb16a3c6e3","NumbersInMotion"
 81 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-168","0x51fff465eafe02c91ac29a65d4071badf1b79543","Blockchance"
 82 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-169","0x529d928c7debb7a16a291a1ba3a84a4a0dbb5289","toiminto"
 83 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-17","0x911463faacb3d0153522e768ee47dc0d6ad5dc5c","Simon De Mai"
 84 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-170","0x7d8d846f24ce0d2c69fcf557edb92f4f8f9aebc1","Paweł Dudko"
 85 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-171","0x0c192889c5a96fb5a541df829b5233b9df3418e6","Sarah Ridgley"
 86 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-172","0x92fb249865ae0d26120031868ba07434674a1600","Aaron Penne x Boreta"
 87 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-173","0x9546c0f8260cc1560a0db625ea4eec1a823866ac","Piter Pasma"
 88 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-174","0x439f64293716d6778c0c7ffd10e1ebdd33d63672","Ryan Struhl"
 89 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-175","0x5e8a9afad6225118ed0f4c1fe944924262fe61c4","ixnayokay"
 90 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-176","0xe00712086490734ef8b4d72839a7237e505767f5","Zach Lieberman"
 91 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-177","0xe394104f0871b6f1fd46b2de688c3ea6f4cc84dd","Mark Cotton"
 92 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-178","0x8bb1a6245603a30eb5b3bf51c369089927979a5f","Ryan Green"
 93 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-179","0x01cb023186cab05220554ee75b4d69921dd051f1","Bård Ionson"
 94 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-18","0x745cfab7b52a45cdb75bdbcdb6e4562ef25f166b","DCA"
 95 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-180","0xe18fc96ba325ef22746ada9a82d521845a2c16f8","hideo"
 96 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-181","0x983f10b69c6c8d72539750786911359619df313d","Matto"
 97 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-182","0xc7cf7edd6ea7aac57db1929d74a013366cacf0df","last even"
 98 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-183","0xf429690d7f1b2ef1dd77c150831f4367e366aeac","Owen Moore"
 99 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-184","0x54fcfccdcaabd65e107a33edfc0e83ee2c621ec0","Eltono"
100 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-185","0xf48e3a3bcca259005527f395c4080cd68a80a0fe","LIA"
101 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-186","0x298c75883ffe510dad92e5a0ecca9bcc8d77c013",""
102 | 


--------------------------------------------------------------------------------
/memgraph/import-data/projects.csv:
--------------------------------------------------------------------------------
  1 | "project_id","contract_id","project_name","active","complete","locked","website"
  2 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-0","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Chromie Squiggle","True","False","True","https://www.twitter.com/artonblockchain"
  3 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-1","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Genesis","True","True","True","https://www.instagram.com/dacaldera/"
  4 | "0x059edd72cd353df5106d2b9cc5ab83a52287ac3a-2","0x059edd72cd353df5106d2b9cc5ab83a52287ac3a","Construction Token","True","True","True","https://www.jeffgdavis.com/"
  5 | "0x28f2d3805652fb5d359486dffb7d08320d403240-0","0x28f2d3805652fb5d359486dffb7d08320d403240","The Family Mooks","True","False","False",""
  6 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-10","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","NimBuds","True","True","True",""
  7 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-100","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","CENTURY","True","True","False","https://reas.com"
  8 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-101","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Enchiridion","True","True","False","https://instagram.com/generativeartworks/"
  9 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-102","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","I Saw It in a Dream","True","True","False","https://steviep.xyz/i-saw-it-in-a-dream"
 10 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-103","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Octo Garden","True","True","False","https://www.richlord.com/octo-garden"
 11 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-104","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Eccentrics","True","True","False",""
 12 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-105","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gizmobotz","True","True","False","https://www.instagram.com/cottonchipper/"
 13 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-106","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Radiance","True","True","False","https://www.instagram.com/julienv3ga/"
 14 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-107","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Low Tide","True","True","False","https://linktr.ee/LowTideAB"
 15 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-108","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Divisions","True","True","False","https://linktr.ee/_mconnolly_"
 16 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-109","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Speckled Summits","True","True","False","https://twitter.com/purphat"
 17 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-11","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","HyperHash","True","True","True","https://www.beervangeer.nl/hyperhash/"
 18 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-110","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Lava Glow","True","True","False","https://twitter.com/jhelf"
 19 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-111","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","70s Pop Ghost Bonus Pack 👻","True","True","False","https://70sPop.love"
 20 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-112","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Alien Clock","True","True","False","https://www.theblocksofart.com/"
 21 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-113","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","celestial cyclones","True","True","False","https://hideocode.art"
 22 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-114","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","glitch crystal monsters","True","True","False","https://instagram.com/alidasun"
 23 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-115","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Dot Grid","True","True","False","https://www.theelephantnl.art"
 24 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-116","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Flowers","True","True","False","https://rvig.art/Flowers.html"
 25 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-117","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Transitions","True","True","False","https://mattbilfield.com/transitions-on-artblocks"
 26 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-118","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","LeWitt Generator Generator","True","True","False","https://chan.gallery"
 27 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-119","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ecumenopolis","True","True","False","https://www.instagram.com/gengeomergence/"
 28 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-12","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Unigrids","True","True","True","https://zeblocks.com/"
 29 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-120","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Endless Nameless","True","True","False","https://www.newrafael.com/"
 30 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-121","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rinascita","True","True","False","https://stefanocontiero.com/h"
 31 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-122","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Cells","True","True","False","https://twitter.com/HeveyArt"
 32 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-123","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Nucleus","True","True","False","https://twitter.com/HjalmarAstrom"
 33 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-124","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","The Liths of Sisyphus","True","True","False","https://www.instagram.com/_nonfigurativ_/"
 34 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-125","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Calendart","True","True","False","https://twitter.com/ssteeenn"
 35 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-126","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Timepiece","True","True","False","https://twitter.com/wawaa_studio"
 36 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-127","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Labyrometry","True","True","False","https://twitter.com/prettyblocks"
 37 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-128","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Scribbled Boundaries","False","False","False",""
 38 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-129","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Pigments","True","True","False","https://www.instagram.com/darien.brito/"
 39 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-13","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ringers","True","True","True","https://twitter.com/dmitricherniak"
 40 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-130","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Obicera","True","True","False",""
 41 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-131","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Scribbled Boundaries","True","True","False","https://scribbled-boundaries.webflow.io"
 42 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-132","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Tangled","True","True","False","https://www.instagram.com/superblob/"
 43 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-133","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Organized Disruption","True","True","False","https://linktr.ee/praystation"
 44 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-134","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Wave Schematics","True","True","False","https://twitter.com/luxpris"
 45 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-135","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Brushpops","True","True","False","https://supersize.co.il/portfolio/brushpops/"
 46 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-136","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","SpiroFlakes","True","True","False","https://linktr.ee/artBoffin"
 47 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-137","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Alien Insects","True","True","False","https://www.shvembldr.com/"
 48 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-138","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Geometry Runners","True","True","False","https://www.richlord.com/"
 49 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-139","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Eccentrics 2: Orbits","True","True","False","https://twitter.com/robdixon"
 50 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-14","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Cyber Cities","True","True","False",""
 51 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-140","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Good Vibrations","True","True","False","https://twitter.com/IOivm"
 52 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-141","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rapture","True","True","False","https://data-imaginist.com/rapture"
 53 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-142","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Unknown Signals","True","True","False","https://twitter.com/_k0ch"
 54 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-143","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","phase","True","True","False","https://twitter.com/LorenBednar"
 55 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-144","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","autoRAD","True","True","False","http://www.sgtslaughtermelon.com/art"
 56 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-145","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Beatboxes","True","True","False","https://zeblocks.com/"
 57 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-146","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Neighborhood","True","True","False","https://www.habitat.org/"
 58 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-147","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Trossets","True","True","False","https://www.annacarreras.com"
 59 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-148","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","ørß1t$","False","False","False","https://twitter.com/berkozdemir"
 60 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-149","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Dot Matrix Gradient Study","True","True","False","https://twitter.com/purphat"
 61 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-15","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Utopia","True","True","False","https://twitter.com/ge1doot"
 62 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-150","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","PrimiLife","True","True","False","https://wuwa.org/primitives"
 63 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-151","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","High Tide","True","True","False","https://linktr.ee/LowTideAB"
 64 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-152","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Fake Internet Money","True","True","False","https://steviep.xyz/fake-internet-money"
 65 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-153","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","We","True","True","False","https://vamoss.com.br"
 66 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-154","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Warp","True","True","False",""
 67 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-155","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Undefined","False","False","False",""
 68 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-156","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Moments","True","True","False","https://twitter.com/r4v3n_art"
 69 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-157","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","UltraWave 369","True","True","False","http://www.beervangeer.nl"
 70 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-158","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","a heart and a soul","True","True","False","https://twitter.com/sirdiekblak"
 71 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-159","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Fragments of an Infinite Field","True","True","False",""
 72 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-16","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Color Study","True","True","False","https://www.jeffgdavis.com"
 73 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-160","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Seadragons","True","True","False","http://marcinignac.com/projects/seadragons/"
 74 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-161","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","spawn","True","True","False","https://johnprovencher.com"
 75 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-162","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Democracity","True","True","False","https://instagram.com/generativeartworks/"
 76 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-163","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Meridian","True","True","False","https://mattdesl.com/"
 77 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-164","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Phototaxis","True","True","False","https://reas.com"
 78 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-165","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gravity 16","True","True","False","https://herdberg.com"
 79 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-166","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Ouroboros","True","True","False","https://raregonzo.art"
 80 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-167","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Blaschke Ballet","True","True","False","https://www.instagram.com/numbersinmotion/"
 81 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-168","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Bloom","True","False","False","https://blockchance.io"
 82 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-169","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Augmented Sequence","True","True","False","https://toiminto.swaeg.net"
 83 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-17","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Spectron","True","True","True","https://spectron.netlify.app"
 84 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-170","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Chroma Theory","True","False","False","https://www.pdudko.com/p/chroma-theory.html"
 85 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-171","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Himinn","True","True","False","https://twitter.com/sarah_ridgley"
 86 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-172","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Rituals - Venice","True","True","False","https://www.ritualsirl.com/"
 87 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-173","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Skulptuur","True","True","False","https://piterpasma.nl/skulptuur"
 88 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-174","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Letters to My Future Self","True","True","False","https://www.instagram.com/rwstruhl/"
 89 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-175","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","mono no aware","True","False","False","http://ixnayokay.art"
 90 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-176","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Color Gradient Studies","False","False","False","https://www.instagram.com/zach.lieberman/"
 91 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-177","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Space Birds","True","False","False","https://medium.com/@markcotton_81658/inspiration-for-and-development-of-space-birds-on-artblocks-io-560af0d1a6a9"
 92 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-178","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Beauty in the Hurting","True","True","False","https://linktr.ee/ryangreen8"
 93 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-179","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","8","True","False","False","https://collect.bardionson.com/eight"
 94 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-18","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Gen 2","True","True","False",""
 95 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-180","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","mecha suits","True","True","False","https://hideocode.art/"
 96 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-181","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","FOCUS","True","False","False","https://zenerative.com/focus/"
 97 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-182","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Amoeba","True","False","False","https://lasteven.xyz"
 98 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-183","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Quarantine","True","False","False","https://www.owenmoore.art/artwork/quarantine"
 99 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-184","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","Swing","False","False","False","https://www.eltono.com/lab/projects/swing/"
100 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-185","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","little boxes on the hillsides, child","True","False","False","https://liaworks.com"
101 | "0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270-186","0xa7d8d9ef8d8ce8992df33d8b8cf4aebabd5bd270","THE SOURCE CoDE","False","False","False",""
102 | 


--------------------------------------------------------------------------------
/memgraph/query_modules/amazon_books_analysis.py:
--------------------------------------------------------------------------------
  1 | import mgp
  2 | from queue import PriorityQueue
  3 | 
  4 | '''
  5 | Sample trigger for calling write procedure: 
  6 | 
  7 | CREATE TRIGGER newBookRating
  8 | ON CREATE BEFORE COMMIT EXECUTE
  9 | UNWIND createdEdges AS e
 10 | CALL amazon_book_analysis.new_rating(e) YIELD *;
 11 | '''
 12 | 
 13 | @mgp.write_proc
 14 | def new_rating(
 15 |     context: mgp.ProcCtx,
 16 |     rating: mgp.Edge
 17 | ) -> mgp.Record(Rating = mgp.Nullable[mgp.Edge],
 18 |                 Book = mgp.Nullable[mgp.Vertex]):
 19 |     if rating.type.name == "RATED":           
 20 |         book = rating.to_vertex
 21 |         book_rating = rating.properties.get("rating")
 22 |         rating_sum = book.properties.get("rating_sum")
 23 |         if  rating_sum == None:
 24 |             book.properties.set("rating_sum", book_rating)
 25 |             book.properties.set("num_of_ratings", 1)
 26 |         else: 
 27 |             current_rating = rating_sum + book_rating
 28 |             book.properties.set("rating_sum", current_rating) 
 29 |             book.properties.set("num_of_ratings", book.properties.get("num_of_ratings") + 1)
 30 |         return mgp.Record(Rating=rating, Book=book)
 31 |     return mgp.Record(Rating=None, Book=None)
 32 | 
 33 | '''
 34 | Sample Query module call returns 10 books (if there are 10) with 60 or more ratings. 
 35 | CALL amazon_books_analysis.best_rated_books(10, 60)
 36 | YIELD best_rated_books
 37 | UNWIND best_rated_books AS Book
 38 | WITH Book[0] AS Rating, Book[1] as Title
 39 | RETURN Rating, Title
 40 | '''
 41 | 
 42 | @mgp.read_proc
 43 | def best_rated_books(
 44 |     context: mgp.ProcCtx,
 45 |     number_of_books: int,
 46 |     ratings_treshold: int
 47 |     
 48 | ) -> mgp.Record(best_rated_books = list):
 49 | 
 50 |     q = PriorityQueue(maxsize=number_of_books)
 51 |     for book in context.graph.vertices:
 52 |         label, = book.labels
 53 |         if label.name == "Book": 
 54 |             num_of_ratings = book.properties.get("num_of_ratings")
 55 |             title = book.properties.get("title")
 56 |             if num_of_ratings != None and num_of_ratings >= ratings_treshold:
 57 |                 rating = book.properties.get("rating_sum")/num_of_ratings
 58 |                 if q.empty() or not q.full():
 59 |                     q.put((rating, title))
 60 |                 else: 
 61 |                     top = q.get()
 62 |                     if top[0] > rating:
 63 |                         q.put(top)
 64 |                     else: 
 65 |                         q.put((rating, title))
 66 |                         
 67 | 
 68 |     books = list()
 69 |     while not q.empty():
 70 |         books.append(q.get())
 71 | 
 72 |     books.reverse()
 73 |     return mgp.Record(best_rated_books=books)
 74 | 
 75 | """
 76 | MATCH (u:User{id:"A3NNFCL3ORBQUI"}) CALL amazon_books_analysis.recommend_books_for_user(u, 10, 60) 
 77 | YIELD recommended_books
 78 | UNWIND recommended_books AS Book
 79 | WITH Book[0] AS Rating, Book[1] as Title
 80 | RETURN Rating, Title
 81 | """
 82 | 
 83 |     
 84 | @mgp.read_proc
 85 | def recommend_books_for_user(
 86 |     context: mgp.ProcCtx,
 87 |     user : mgp.Vertex,
 88 |     number_of_books: int,
 89 |     ratings_treshold: int
 90 |     
 91 | ) -> mgp.Record(recommended_books = list):
 92 | 
 93 |     rated_books = []
 94 |     for user_ratings in user.out_edges:
 95 |         user_book = user_ratings.to_vertex
 96 |         rated_books.append(user_book.id)
 97 | 
 98 |     q = PriorityQueue(maxsize=number_of_books)
 99 |     for book in context.graph.vertices:
100 |         label, = book.labels
101 |         if label.name == "Book" and book.id not in rated_books: 
102 |             num_of_ratings = book.properties.get("num_of_ratings")
103 |             title = book.properties.get("title")
104 |             if num_of_ratings != None and num_of_ratings >= ratings_treshold:
105 |                 rating = book.properties.get("rating_sum")/num_of_ratings
106 |                 if q.empty() or not q.full():
107 |                     q.put((rating, title))
108 |                 else: 
109 |                     top = q.get()
110 |                     if top[0] > rating:
111 |                         q.put(top)
112 |                     else: 
113 |                         q.put((rating, title))
114 |                         
115 | 
116 |     books = list()
117 |     while not q.empty():
118 |         books.append(q.get())
119 | 
120 |     books.reverse()
121 |     return mgp.Record(recommended_books=books)           
122 | 


--------------------------------------------------------------------------------
/memgraph/query_modules/movielens_analysis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Sample trigger for calling write procedure: 
  3 | 
  4 | CREATE TRIGGER newMovieRating
  5 | ON CREATE BEFORE COMMIT EXECUTE
  6 | UNWIND createdEdges AS e
  7 | CALL movielens_analysis.new_rating(e) YIELD *;
  8 | """
  9 | import mgp
 10 | from queue import PriorityQueue
 11 | 
 12 | @mgp.write_proc
 13 | def new_rating(
 14 |     context: mgp.ProcCtx,
 15 |     rating: mgp.Edge
 16 | ) -> mgp.Record(Rating = mgp.Nullable[mgp.Edge],
 17 |                 Movie = mgp.Nullable[mgp.Vertex]):
 18 |     if rating.type.name == "RATED":           
 19 |         movie = rating.to_vertex
 20 |         movie_rating = rating.properties.get("rating")
 21 |         rating_sum = movie.properties.get("rating_sum")
 22 |         if  rating_sum == None:
 23 |             movie.properties.set("rating_sum", movie_rating)
 24 |             movie.properties.set("num_of_ratings", 1)
 25 |         else: 
 26 |             current_rating = rating_sum + movie_rating
 27 |             movie.properties.set("rating_sum", current_rating) 
 28 |             movie.properties.set("num_of_ratings", movie.properties.get("num_of_ratings") + 1)
 29 |         return mgp.Record(Rating=rating, Movie=movie)
 30 |     return mgp.Record(Rating=None, Movie=None)
 31 | 
 32 | 
 33 | """
 34 | Sample query module call that returns 10 movies (if there are 10) that have 20 or more ratings. 
 35 | CALL movielens_analysis.best_rated_movies(10, 20) 
 36 | YIELD best_rated_movies 
 37 | UNWIND best_rated_movies AS Movie
 38 | WITH Movie[0] AS Rating, Movie[1] as Title
 39 | RETURN Rating, Title
 40 | 
 41 | """
 42 | 
 43 | @mgp.read_proc
 44 | def best_rated_movies(
 45 |     context: mgp.ProcCtx,
 46 |     number_of_movies: int,
 47 |     ratings_treshold: int
 48 | ) -> mgp.Record(best_rated_movies = list):
 49 | 
 50 |     q = PriorityQueue(maxsize=number_of_movies)
 51 |     for movie in context.graph.vertices:
 52 |         label, = movie.labels
 53 |         if label.name == "Movie": 
 54 |             num_of_ratings = movie.properties.get("num_of_ratings")
 55 |             title = movie.properties.get("title")
 56 |             if num_of_ratings != None and num_of_ratings >= ratings_treshold:
 57 |                 rating = movie.properties.get("rating_sum")/num_of_ratings
 58 |                 if q.empty() or not q.full():
 59 |                     q.put((rating, title))
 60 |                 else: 
 61 |                     top = q.get()
 62 |                     if top[0] > rating:
 63 |                         q.put(top)
 64 |                     else: 
 65 |                         q.put((rating, title))
 66 |                         
 67 |     movies = list()
 68 |     while not q.empty():
 69 |         movies.append(q.get())
 70 | 
 71 |     movies.reverse()
 72 |     return mgp.Record(best_rated_movies=movies)
 73 | 
 74 | """
 75 | Sample query call that returns worst rated 5 movies (if there are 5) that have 8 or more ratings. 
 76 | CALL movielens_analysis.worst_rated_movies(5, 8) 
 77 | YIELD worst_rated_movies 
 78 | UNWIND worst_rated_movies AS Movie
 79 | WITH Movie[0] AS Rating, Movie[1] as Title
 80 | RETURN Rating, Title
 81 | 
 82 | """
 83 | 
 84 | @mgp.read_proc
 85 | def worst_rated_movies(
 86 |     context: mgp.ProcCtx,
 87 |     number_of_movies: int,
 88 |     ratings_treshold: int
 89 | ) -> mgp.Record(worst_rated_movies = list):
 90 | 
 91 |     q = PriorityQueue(maxsize=number_of_movies)
 92 |     for movie in context.graph.vertices:
 93 |         label, = movie.labels
 94 |         if label.name == "Movie": 
 95 |             num_of_ratings = movie.properties.get("num_of_ratings")
 96 |             title = movie.properties.get("title")
 97 |             if num_of_ratings != None and num_of_ratings >= ratings_treshold:
 98 |                 rating = movie.properties.get("rating_sum")/num_of_ratings
 99 |                 rating = rating * -1
100 |                 if q.empty() or not q.full():
101 |                     q.put((rating, title))
102 |                 else: 
103 |                     top = q.get()
104 |                     if top[0] > rating:
105 |                         q.put(top)
106 |                     else: 
107 |                         q.put((rating, title))
108 |                         
109 |     movies = list()
110 |     while not q.empty():
111 |         rating, title = q.get()
112 |         rating = abs(rating)
113 |         movies.append((rating, title))
114 | 
115 |     movies.reverse()
116 |     return mgp.Record(worst_rated_movies=movies)
117 | 


--------------------------------------------------------------------------------
/memgraph/transformations/amazon_books.py:
--------------------------------------------------------------------------------
 1 | import mgp
 2 | import json
 3 | 
 4 | @mgp.transformation
 5 | def book_ratings(messages: mgp.Messages 
 6 |             )-> mgp.Record(query=str, parameters=mgp.Nullable[mgp.Map]):
 7 |     result_queries = []
 8 | 
 9 |     for i in range(messages.total_messages()):
10 |         message = messages.message_at(i)
11 |         books_dict = json.loads(message.payload().decode('utf8'))
12 |         result_queries.append(
13 |             mgp.Record(
14 |                 query=("MERGE (b:Book {id: $bookId, title: $title}) "
15 |                        "MERGE (u:User {id: $userId}) "
16 |                        "WITH u, b "
17 |                        "CREATE (u)-[r:RATED {rating: ToFloat($rating), timestamp: $timestamp}]->(b)"),
18 |                 parameters={
19 |                     "bookId": books_dict["bookId"],
20 |                     "userId": books_dict["userId"],
21 |                     "rating": books_dict["rating"],
22 |                     "timestamp": books_dict["timestamp"],
23 |                     "title": books_dict["title"]
24 |                     }))
25 | 
26 |     return result_queries
27 | 


--------------------------------------------------------------------------------
/memgraph/transformations/artblocks.py:
--------------------------------------------------------------------------------
 1 | import mgp
 2 | import json
 3 | 
 4 | 
 5 | @mgp.transformation
 6 | def sales(messages: mgp.Messages
 7 |           ) -> mgp.Record(query=str, parameters=mgp.Nullable[mgp.Map]):
 8 | 
 9 |     result_queries = []
10 | 
11 |     for i in range(messages.total_messages()):
12 |         message = messages.message_at(i)
13 |         sale_info = json.loads(message.payload().decode('utf8'))
14 |         result_queries.append(
15 |             mgp.Record(
16 |                 query=(
17 |                     "CREATE (s:Sale {sale_id: $sale_id, payment_token: $payment_token, price: $price, datetime: $datetime})"
18 |                     "MERGE (p:Project {project_id: $project_id})"
19 |                     "CREATE (p)-[:HAS]->(s)"
20 |                     "MERGE (a:Account {account_id: $seller_id})"
21 |                     "CREATE (a)-[:IS_SELLING]->(s)"
22 |                     "MERGE (b:Account {account_id: $buyer_id})"
23 |                     "CREATE (b)-[:IS_BUYING]->(s)"
24 |                     "MERGE (t:Token {token_id: $token_id})"
25 |                     "CREATE (t)-[:IS_SOLD_IN]->(s)"),
26 |                 parameters={
27 |                     "project_id": sale_info["project_id"],
28 |                     "seller_id": sale_info["seller_id"],
29 |                     "buyer_id": sale_info["buyer_id"],
30 |                     "token_id": sale_info["token_id"],
31 |                     "sale_id": sale_info["sale_id"],
32 |                     "payment_token": sale_info["payment_token"],
33 |                     "price": sale_info["price"],
34 |                     "datetime": sale_info["datetime"]
35 |                 }))
36 |     return result_queries
37 | 


--------------------------------------------------------------------------------
/memgraph/transformations/github_commits.py:
--------------------------------------------------------------------------------
 1 | import mgp
 2 | import json
 3 | 
 4 | @mgp.transformation
 5 | def commit(messages: mgp.Messages 
 6 |             )-> mgp.Record(query=str, parameters=mgp.Nullable[mgp.Map]):
 7 |     result_queries = []
 8 | 
 9 |     for i in range(messages.total_messages()):
10 |         message = messages.message_at(i)
11 |         commit_dict = json.loads(message.payload().decode('utf8'))
12 |         result_queries.append(
13 |             mgp.Record(
14 |                 query=("MERGE (u1:User {username: $author}) "
15 |                        "MERGE (c:Commit {id: $commit}) "
16 |                        "MERGE (c)-[:CREATED_BY]->(u1) "
17 |                        "WITH u1 "
18 |                        "UNWIND $followers AS follower "
19 |                        "MERGE (u2:User {username: follower}) "
20 |                        "MERGE (u2)-[:FOLLOWS]->(u1) "
21 |                        "WITH u1 "
22 |                        "UNWIND $following AS follows "
23 |                        "MERGE (u3:User {username: follows}) "
24 |                        "MERGE (u3)<-[:FOLLOWS]-(u1) "),
25 |                 parameters={
26 |                     "commit": commit_dict["commit"],
27 |                     "author": commit_dict["author"],
28 |                     "followers": commit_dict["followers"],
29 |                     "following": commit_dict["following"]
30 |                     }))
31 | 
32 |     return result_queries
33 | 


--------------------------------------------------------------------------------
/memgraph/transformations/movielens.py:
--------------------------------------------------------------------------------
 1 | import mgp
 2 | import json
 3 | 
 4 | 
 5 | @mgp.transformation
 6 | def rating(messages: mgp.Messages
 7 |              ) -> mgp.Record(query=str, parameters=mgp.Nullable[mgp.Map]):
 8 |     result_queries = []
 9 | 
10 |     for i in range(messages.total_messages()):
11 |         message = messages.message_at(i)
12 |         movie_dict = json.loads(message.payload().decode('utf8'))
13 |         result_queries.append(
14 |             mgp.Record(
15 |                 query=("MERGE (u:User {id: $userId}) "
16 |                        "MERGE (m:Movie {id: $movieId, title: $title}) "
17 |                        "WITH u, m "
18 |                        "UNWIND $genres as genre "
19 |                        "MERGE (m)-[:OF_GENRE]->(:Genre {name: genre}) "
20 |                        "MERGE (u)-[r:RATED {rating: ToFloat($rating), timestamp: $timestamp}]->(m)"),
21 |                 parameters={
22 |                     "userId": movie_dict["userId"],
23 |                     "movieId": movie_dict["movie"]["movieId"],
24 |                     "title": movie_dict["movie"]["title"],
25 |                     "genres": movie_dict["movie"]["genres"],
26 |                     "rating": movie_dict["rating"],
27 |                     "timestamp": movie_dict["timestamp"]}))
28 | 
29 |     return result_queries
30 | 


--------------------------------------------------------------------------------
/platform_variables.env:
--------------------------------------------------------------------------------
 1 | KAFKA=False
 2 | REDPANDA=False
 3 | RABBITMQ=False
 4 | PULSAR=False
 5 | KAFKA_IP=kafka
 6 | KAFKA_PORT=9092
 7 | REDPANDA_IP=redpanda
 8 | REDPANDA_PORT=29092
 9 | RABBITMQ_IP=rabbitmq
10 | RABBITMQ_PORT=5672
11 | PULSAR_IP=pulsar
12 | PULSAR_PORT=6650
13 | 


--------------------------------------------------------------------------------
/start.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import re
  4 | import socket
  5 | import subprocess
  6 | 
  7 | from time import sleep
  8 | 
  9 | 
 10 | KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
 11 | REDPANDA_PORT = os.getenv('REDPANDA_PORT', '29092')
 12 | RABBITMQ_PORT = os.getenv('RABBITMQ_PORT', '5672')
 13 | PULSAR_PORT = os.getenv('PULSAR_PORT', '6650')
 14 | ZOOKEEPER_PORT_FULL = os.getenv('KAFKA_CFG_ZOOKEEPER_CONNECT', 'zookeeper:2181')
 15 | ZOOKEEPER_PORT = re.findall(r'\d+', ZOOKEEPER_PORT_FULL)[0]
 16 | DATASETS = ["art-blocks", "github", "movielens", "amazon-books"]
 17 | 
 18 | def parse_arguments():
 19 |     parser = argparse.ArgumentParser()
 20 |     parser.add_argument("--platforms", nargs="+", choices=["kafka", "redpanda", "rabbitmq", "pulsar"],
 21 |                         default=["kafka", "redpanda", "rabbitmq", "pulsar"])
 22 |     parser.add_argument("--dataset", type=str,
 23 |                         choices=DATASETS + ["all"], default="all")
 24 | 
 25 |     value = parser.parse_args()
 26 |     return value
 27 | 
 28 | 
 29 | def docker_build_run(platforms, dataset_list):
 30 |     # build all choosen platforms
 31 |     for platform in platforms:
 32 |         subprocess.call("docker-compose build " + platform, shell=True)
 33 | 
 34 |     # build datasets
 35 |     for dataset in dataset_list:
 36 |         subprocess.call("docker-compose build " + dataset, shell=True)
 37 | 
 38 |     for platform in platforms:
 39 |         subprocess.call(
 40 |             "docker-compose up -d " + platform, shell=True)
 41 | 
 42 |     # env-file: KAFKA, REDPANDA, RABBITMQ, PULSAR - default False
 43 |     # adding -e KAFKA=True -e REDPANDA=True will change those env vars
 44 | 
 45 |     list_of_ports = list()
 46 |     env_var = ""
 47 |     for platform in platforms:
 48 |         env_var += " " + "-e " + platform.upper() + "=True"
 49 |         list_of_ports.append(platform.upper() + "_PORT")
 50 | 
 51 |     # TODO: check if PULSAR is really running - not based on port
 52 |     sleep(8)
 53 | 
 54 |     retries = 30
 55 | 
 56 |     ports_not_used = True
 57 |     while retries > 0 and ports_not_used:
 58 |         ports_not_used = False
 59 |         for port in list_of_ports:
 60 |             print(globals()[port])
 61 |             test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 62 |             if test_socket.connect_ex(('localhost', int(globals()[port]))) != 0:
 63 |                 ports_not_used = True
 64 |                 print("platform at port " +
 65 |                       globals()[port] + " has not started.")
 66 |             test_socket.close()
 67 |         retries -= 1
 68 |         sleep(1)
 69 |     sleep(10)
 70 | 
 71 |     if ports_not_used:
 72 |         print("Streaming platforms are not running correctly.")
 73 |         return
 74 |     
 75 |     for dataset in dataset_list:
 76 |         subprocess.call("docker-compose run -d" +
 77 |                         env_var + " " + dataset, shell=True)        
 78 | 
 79 | 
 80 | def is_port_in_use():
 81 |     all_ports = ["ZOOKEEPER_PORT", "KAFKA_PORT",
 82 |                  "REDPANDA_PORT", "RABBITMQ_PORT", "PULSAR_PORT"]
 83 | 
 84 |     for port in all_ports:
 85 |         test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 86 |         if test_socket.connect_ex(('localhost', int(globals()[port]))) == 0:
 87 |             return True
 88 |         test_socket.close()
 89 |     return False
 90 | 
 91 | 
 92 | def main():
 93 |     platforms = list()
 94 |     value = parse_arguments()
 95 |     platforms = value.platforms
 96 |     dataset_list = [value.dataset]
 97 |     if value.dataset == "all":
 98 |         dataset_list = DATASETS
 99 | 
100 |     subprocess.call("docker-compose rm -sf", shell=True)
101 |     if not is_port_in_use():
102 |         docker_build_run(platforms, dataset_list)
103 |     else:
104 |         print("Ports in use. Try stopping services on necessary ports and run the script again.")
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     main()
109 | 


--------------------------------------------------------------------------------
/stream/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/data-streams/640eada56bcd11673e8fe87ea863afbb852c6582/stream/__init__.py


--------------------------------------------------------------------------------
/stream/apache_pulsar.py:
--------------------------------------------------------------------------------
 1 | from time import sleep
 2 | import json
 3 | import pulsar
 4 | 
 5 | 
 6 | def producer(ip, port, topic, generate, stream_delay):
 7 |     client = pulsar.Client('pulsar://' + ip + ':' + port)
 8 |     producer = client.create_producer(topic)
 9 |     message = generate()
10 |     while True:
11 |         try:
12 |             producer.send(json.dumps(next(message)).encode('utf8'))
13 |             sleep(stream_delay)
14 |         except Exception as e:
15 |             print(f"Error: {e}")
16 | 
17 | 
18 | def consumer(ip, port, topic, platform):
19 |     client = pulsar.Client('pulsar://' + ip + ':' + port)
20 |     consumer = client.subscribe(topic, 'my-subscription')
21 |     while True:
22 |         msg = consumer.receive()
23 |         try:
24 |             print(platform, ": ", msg.data())
25 |             consumer.acknowledge(msg)
26 |         except:
27 |             consumer.negative_acknowledge(msg)
28 |             client.close()
29 | 


--------------------------------------------------------------------------------
/stream/kafka_redpanda.py:
--------------------------------------------------------------------------------
 1 | from kafka import KafkaConsumer, KafkaProducer
 2 | from kafka.admin import KafkaAdminClient, NewTopic
 3 | from kafka.errors import TopicAlreadyExistsError, NoBrokersAvailable
 4 | from time import sleep
 5 | import json
 6 | 
 7 | 
 8 | def get_admin_client(ip, port, kafka_username, kafka_password):
 9 |     retries = 30
10 |     while True:
11 |         try:
12 |             admin_client = KafkaAdminClient(
13 |                 bootstrap_servers=ip + ':' + port,
14 |                 client_id="test",
15 |                 security_protocol="SASL_PLAINTEXT",
16 |                 sasl_mechanism="PLAIN",
17 |                 sasl_plain_username=kafka_username,
18 |                 sasl_plain_password=kafka_password)
19 |             return admin_client
20 |         except NoBrokersAvailable:
21 |             retries -= 1
22 |             if not retries:
23 |                 raise
24 |             sleep(1)
25 | 
26 | 
27 | def consumer(ip, port, topic, platform):
28 |     consumer = KafkaConsumer(topic,
29 |                              bootstrap_servers=ip + ':' + port,
30 |                              auto_offset_reset='earliest',
31 |                              group_id=None,
32 |                              security_protocol="SASL_PLAINTEXT",
33 |                              sasl_mechanism="PLAIN",
34 |                              sasl_plain_username="public",
35 |                              sasl_plain_password="public")
36 |     try:
37 |         while True:
38 |             msg_pack = consumer.poll()
39 |             if not msg_pack:
40 |                 sleep(1)
41 |                 continue
42 |             for _, messages in msg_pack.items():
43 |                 for message in messages:
44 |                     message = json.loads(message.value.decode('utf8'))
45 |                     print(platform, " :", str(message))
46 | 
47 |     except Exception as e:
48 |             print(f"Error: {e}")
49 | 
50 | 
51 | def create_topic(ip, port, topic, kafka_username, kafka_password):
52 |     admin_client = get_admin_client(ip, port, kafka_username, kafka_password)
53 |     my_topic = [
54 |         NewTopic(name=topic, num_partitions=1, replication_factor=1)]
55 |     try:
56 |         admin_client.create_topics(new_topics=my_topic, validate_only=False)
57 |     except TopicAlreadyExistsError:
58 |         pass
59 |     print(f"All topics: {admin_client.list_topics()}")
60 | 
61 | 
62 | def create_kafka_producer(ip, port, kafka_username, kafka_password):
63 |     retries = 30
64 |     while True:
65 |         try:
66 |             producer = KafkaProducer(
67 |                 bootstrap_servers=ip + ':' + port,
68 |                 security_protocol="SASL_PLAINTEXT",
69 |                 sasl_mechanism="PLAIN",
70 |                 sasl_plain_username=kafka_username,
71 |                 sasl_plain_password=kafka_password)
72 |             return producer
73 |         except NoBrokersAvailable:
74 |             retries -= 1
75 |             if not retries:
76 |                 raise
77 |             print("Failed to connect to Kafka")
78 |             sleep(1)
79 | 
80 | 
81 | def producer(ip, port, topic, kafka_username, kafka_password, generate, stream_delay):
82 |     producer = create_kafka_producer(ip, port, kafka_username, kafka_password)
83 |     message = generate()
84 |     while True:
85 |         try:
86 |             mssg = json.dumps(next(message)).encode('utf8')
87 |             producer.send(topic, mssg)
88 |             print(mssg)
89 |             producer.flush()
90 |             sleep(stream_delay)
91 |         except Exception as e:
92 |             print(f"Error: {e}")
93 | 


--------------------------------------------------------------------------------
/stream/producer.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Process
  2 | import argparse
  3 | import os
  4 | import pika
  5 | import pulsar
  6 | import stream.apache_pulsar as apache_pulsar
  7 | import stream.kafka_redpanda as kafka_redpanda
  8 | import stream.rabbitmq as rabbitmq
  9 | 
 10 | KAFKA_IP = os.getenv('KAFKA_IP', 'localhost')
 11 | KAFKA_PORT = os.getenv('KAFKA_PORT', '9093')
 12 | KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', 'movielens')
 13 | KAFKA_USERNAME = os.getenv('KAFKA_USERNAME', 'admin')
 14 | KAFKA_PASSWORD = os.getenv('KAFKA_PASSWORD', 'admin')
 15 | REDPANDA_IP = os.getenv('REDPANDA_IP', 'localhost')
 16 | REDPANDA_PORT = os.getenv('REDPANDA_PORT', '29092')
 17 | REDPANDA_TOPIC = os.getenv('REDPANDA_TOPIC', 'movielens')
 18 | RABBITMQ_IP = os.getenv('RABBITMQ_IP', 'localhost')
 19 | RABBITMQ_PORT = os.getenv('RABBITMQ_PORT', '5672')
 20 | RABBITMQ_QUEUE = os.getenv('RABBITMQ_QUEUE', 'movielens')
 21 | PULSAR_IP = os.getenv('PULSAR_IP', 'localhost')
 22 | PULSAR_PORT = os.getenv('PULSAR_PORT', '6650')
 23 | PULSAR_TOPIC = os.getenv('PULSAR_TOPIC', 'movielens')
 24 | KAFKA = os.getenv('KAFKA', 'False')
 25 | REDPANDA = os.getenv('REDPANDA', 'False')
 26 | RABBITMQ = os.getenv('RABBITMQ', 'False')
 27 | PULSAR = os.getenv('PULSAR', 'False')
 28 | 
 29 | 
 30 | def restricted_float(x):
 31 |     try:
 32 |         x = float(x)
 33 |     except ValueError:
 34 |         raise argparse.ArgumentTypeError("%r not a floating-point literal" % (x,))
 35 |     if x < 0.0 or x > 3.0:
 36 |         raise argparse.ArgumentTypeError("%r not in range [0.0, 3.0]" % (x,))
 37 |     return x
 38 | 
 39 | 
 40 | def str2bool(v):
 41 |     if isinstance(v, bool):
 42 |         return v
 43 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 44 |         return True
 45 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 46 |         return False
 47 |     else:
 48 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 49 | 
 50 | 
 51 | def parse_arguments():
 52 |     parser = argparse.ArgumentParser()
 53 |     parser.add_argument("--stream-delay", type=restricted_float, default=2.0,
 54 |                         help="Seconds to wait before producing a new message (MIN=0.0, MAX=3.0).")
 55 |     parser.add_argument("--consumer", type=str2bool, nargs='?', const=True, default=False,
 56 |                         help="Start consumers.")
 57 |     value = parser.parse_args()
 58 |     return value
 59 | 
 60 | 
 61 | def run(generate):
 62 |     args = parse_arguments()
 63 |     process_list = list()
 64 | 
 65 |     if KAFKA == 'True':
 66 |         kafka_redpanda.create_topic(KAFKA_IP, KAFKA_PORT, KAFKA_TOPIC, KAFKA_USERNAME, KAFKA_PASSWORD)
 67 | 
 68 |         p1 = Process(target=lambda: kafka_redpanda.producer(
 69 |             KAFKA_IP, KAFKA_PORT, KAFKA_TOPIC, KAFKA_USERNAME, KAFKA_PASSWORD, generate, args.stream_delay))
 70 |         p1.start()
 71 |         process_list.append(p1)
 72 | 
 73 |         if args.consumer:
 74 |             p2 = Process(target=lambda: kafka_redpanda.consumer(
 75 |                 KAFKA_IP, KAFKA_PORT, KAFKA_TOPIC, "Kafka"))
 76 |             p2.start()
 77 |             process_list.append(p2)
 78 | 
 79 |     if REDPANDA == 'True':
 80 |         p3 = Process(target=lambda: kafka_redpanda.producer(
 81 |             REDPANDA_IP, REDPANDA_PORT, REDPANDA_TOPIC, generate, args.stream_delay))
 82 |         p3.start()
 83 |         process_list.append(p3)
 84 | 
 85 |         if args.consumer:
 86 |             p4 = Process(target=lambda: kafka_redpanda.consumer(
 87 |                 REDPANDA_IP, REDPANDA_PORT, REDPANDA_TOPIC, "Redpanda"))
 88 |             p4.start()
 89 |             process_list.append(p4)
 90 | 
 91 |     if RABBITMQ == 'True':
 92 |         p5 = Process(target=lambda: rabbitmq.producer(
 93 |             RABBITMQ_IP, RABBITMQ_PORT, RABBITMQ_QUEUE, generate, args.stream_delay))
 94 |         p5.start()
 95 |         process_list.append(p5)
 96 | 
 97 |         if args.consumer:
 98 |             p6 = Process(target=lambda: rabbitmq.consumer(
 99 |                 RABBITMQ_IP, RABBITMQ_PORT, RABBITMQ_QUEUE, "RabbitMQ"))
100 |             p6.start()
101 |             process_list.append(p6)
102 | 
103 |     if PULSAR == 'True':
104 |         p7 = Process(target=lambda: apache_pulsar.producer(
105 |             PULSAR_IP, PULSAR_PORT, PULSAR_TOPIC, generate, args.stream_delay))
106 |         p7.start()
107 |         process_list.append(p7)
108 | 
109 |         #if args.consumer:
110 |         #    p8 = Process(target=lambda: apache_pulsar.consumer(
111 |         #        PULSAR_IP, PULSAR_PORT, PULSAR_TOPIC, "Pulsar"))
112 |         #    p8.start()
113 |         #    process_list.append(p8)
114 | 
115 |     for process in process_list:
116 |         process.join()
117 | 


--------------------------------------------------------------------------------
/stream/rabbitmq.py:
--------------------------------------------------------------------------------
 1 | from time import sleep
 2 | import json
 3 | import pika
 4 | 
 5 | 
 6 | def producer(ip, port, queue, generate, stream_delay):
 7 |     connection = pika.BlockingConnection(
 8 |         pika.ConnectionParameters(ip))
 9 |     channel = connection.channel()
10 |     channel.queue_declare(queue=queue)
11 |     message = generate()
12 |     while True:
13 |         try:
14 |             channel.basic_publish(
15 |                 exchange='', routing_key=queue, body=json.dumps(next(message)).encode('utf8'))
16 |             sleep(stream_delay)
17 |         except Exception as e:
18 |             print(f"Error: {e}")
19 | 
20 | 
21 | def consumer(ip, port, queue, platform):
22 |     connection = pika.BlockingConnection(
23 |         pika.ConnectionParameters(host=ip))
24 |     channel = connection.channel()
25 | 
26 |     channel.queue_declare(queue=queue)
27 | 
28 |     def callback(ch, method, properties, body):
29 |         print(platform, ": ", str(body))
30 | 
31 |     channel.basic_consume(
32 |         queue=queue, on_message_callback=callback, auto_ack=True)
33 | 
34 |     print(' [*] Waiting for messages. To exit press CTRL+C')
35 |     channel.start_consuming()
36 | 


--------------------------------------------------------------------------------