├── dagster ├── hackernews │ ├── hackernews_tests │ │ ├── __init__.py │ │ └── test_assets.py │ ├── setup.cfg │ ├── pyproject.toml │ ├── setup.py │ ├── hackernews │ │ ├── resources │ │ │ ├── trino.py │ │ │ └── spark-k8s.py │ │ ├── io │ │ │ └── arrow_dataset.py │ │ ├── __init__.py │ │ └── assets.py │ └── Dockerfile ├── workspace.yaml ├── dagit.Dockerfile ├── pyproject.toml ├── README.md └── poetry.lock ├── trino ├── tests │ └── python_client │ │ ├── requirements.txt │ │ └── python_client_integration_test.py ├── Makefile ├── etc │ └── catalog │ │ └── hive.properties └── README.md ├── delta-lake ├── deltalake-operator │ ├── project │ │ ├── build.properties │ │ └── plugins.sbt │ ├── src │ │ ├── test │ │ │ ├── resources │ │ │ │ └── testtable │ │ │ │ │ ├── part-00000-2ca8e364-cc1f-414b-a51f-ec33e75d25ad-c000.snappy.parquet │ │ │ │ │ └── _delta_log │ │ │ │ │ └── 00000000000000000000.json │ │ │ └── scala │ │ │ │ ├── util │ │ │ │ └── TestUtils.scala │ │ │ │ └── delta │ │ │ │ └── DeltaTableTest.scala │ │ └── main │ │ │ ├── scala │ │ │ ├── util │ │ │ │ └── Config.scala │ │ │ └── delta │ │ │ │ └── DeltaTable.scala │ │ │ └── resources │ │ │ └── fs.xml │ └── build.sbt └── README.md ├── .gitignore ├── resources ├── images │ ├── architecture.png │ ├── tweets-practice.png │ └── hackernews_dagster.png ├── data │ └── README.md └── practices │ └── tweetschampions │ ├── tests │ ├── tweets_trino_test.py │ ├── conftest.py │ ├── tweets_fact_test.py │ └── example.json │ ├── README.md │ ├── submit.sh │ └── pyspark_tweets_fact_ingesting.py ├── spark ├── manifests │ ├── service-accounts.yaml │ ├── services.yaml │ ├── pvc.yaml │ └── role.yaml ├── Dockerfile ├── jars-downloader.sh ├── spark-defaults.conf └── README.md ├── jupyter ├── tests │ ├── test.sh │ ├── notebook-tester.py │ └── spark-hivemetastore-test.ipynb ├── ipython_kernel_config.py ├── README.md ├── jars-downloader.sh ├── spark-defaults.conf └── Dockerfile ├── hive-metastore ├── entrypoint.sh ├── Makefile ├── Dockerfile ├── metastore-site.xml └── README.md └── README.md /dagster/hackernews/hackernews_tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dagster/hackernews/hackernews_tests/test_assets.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /trino/tests/python_client/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | trino -------------------------------------------------------------------------------- /dagster/hackernews/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = hackernews 3 | -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.8.0 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | **/.vscode 3 | 4 | # Python 5 | **/__pycache__ 6 | 7 | # Mac 8 | **/.DS_Store -------------------------------------------------------------------------------- /resources/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leehuwuj/olh/HEAD/resources/images/architecture.png -------------------------------------------------------------------------------- /spark/manifests/service-accounts.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: spark -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.jetbrains.scala" % "sbt-ide-settings" % "1.1.1") 2 | -------------------------------------------------------------------------------- /resources/images/tweets-practice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leehuwuj/olh/HEAD/resources/images/tweets-practice.png -------------------------------------------------------------------------------- /resources/images/hackernews_dagster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leehuwuj/olh/HEAD/resources/images/hackernews_dagster.png -------------------------------------------------------------------------------- /trino/Makefile: -------------------------------------------------------------------------------- 1 | run-docker: 2 | docker run \ 3 | -d \ 4 | --name trino \ 5 | -p 8080:8080 \ 6 | --volume $PWD/etc/catalog:/etc/trino/catalog \ 7 | trinodb/trino -------------------------------------------------------------------------------- /dagster/hackernews/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.dagster] 6 | module_name = "hackernews" 7 | -------------------------------------------------------------------------------- /jupyter/tests/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PYTHONPATH=/usr/local/spark/python/lib/py4j-0.10.9.5-src.zip:/usr/local/spark/python 3 | /opt/conda/bin/python -m pytest /tmp/tests/notebook-tester.py -------------------------------------------------------------------------------- /jupyter/tests/notebook-tester.py: -------------------------------------------------------------------------------- 1 | from testbook import testbook 2 | 3 | @testbook('/tmp/tests/spark-hivemetastore-test.ipynb', execute=True) 4 | def test_get_details(tb): 5 | nb_tables = tb.get("tables") 6 | 7 | assert type(nb_tables) == list -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/test/resources/testtable/part-00000-2ca8e364-cc1f-414b-a51f-ec33e75d25ad-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leehuwuj/olh/HEAD/delta-lake/deltalake-operator/src/test/resources/testtable/part-00000-2ca8e364-cc1f-414b-a51f-ec33e75d25ad-c000.snappy.parquet -------------------------------------------------------------------------------- /trino/etc/catalog/hive.properties: -------------------------------------------------------------------------------- 1 | connector.name=delta-lake 2 | delta.register-table-procedure.enabled=true 3 | hive.metastore.uri=thrift://[CHANGE_ME]:9083 4 | hive.s3.endpoint=http://[CHANGE_ME]:9000 5 | hive.s3.path-style-access=true 6 | hive.s3.aws-access-key=[CHANGE_ME] 7 | hive.s3.aws-secret-key=[CHANGE_ME] -------------------------------------------------------------------------------- /dagster/workspace.yaml: -------------------------------------------------------------------------------- 1 | load_from: 2 | # Enable this if you're gonna test with docker 3 | # - grpc_server: 4 | # host: host.docker.internal 5 | # port: 4266 6 | # location_name: "hackernews" 7 | 8 | # Enable this if you're gonna test directly in your hots 9 | - python_module: hackernews -------------------------------------------------------------------------------- /delta-lake/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Deltalake Operator 4 | - The operator for Deltalake which using Delta standalone instead of Spark 5 | - [DEPRECATED]: 6 | - Use the `delta-rs` instead of Delta standalone. The delta-rs is an Delta SDK built in Rust which supports us interact with Delta transaction log without Spark. It also has a Python binding version which is easy to use. -------------------------------------------------------------------------------- /hive-metastore/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Command: $1" 4 | 5 | if [ "$1" = "init" ]; then 6 | echo "Start init schema" 7 | ${HIVE_HOME}/bin/schematool -initSchema -dbType postgres 8 | elif [ "$1" = "run" ]; then 9 | echo "Starting metastore server" 10 | ${HIVE_HOME}/bin/start-metastore 11 | else 12 | echo "Missing command: run or init" 13 | fi 14 | -------------------------------------------------------------------------------- /spark/manifests/services.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: local-spark 5 | spec: 6 | selector: 7 | spark-app-selector: local-spark 8 | ports: 9 | - name: driver-rpc-port 10 | protocol: TCP 11 | port: 7078 12 | targetPort: 7078 13 | - name: blockmanager 14 | protocol: TCP 15 | port: 7079 16 | targetPort: 7079 -------------------------------------------------------------------------------- /resources/data/README.md: -------------------------------------------------------------------------------- 1 | # Example data 2 | 1. Tweets Champions: 3 | - Source & Download: https://www.kaggle.com/datasets/xvivancos/tweets-during-r-madrid-vs-liverpool-ucl-2018 4 | - This data includes nested json about tweets in Tweeter. It's complex in struct and enough size for our practice. 5 | - Example jobs: 6 | - [Pyspark - Tweets Fact ingestion](https://github.com/leehuwuj/olh/tree/main/resources/practices/tweetschampions) -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/main/scala/util/Config.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.hadoop.fs.Path 5 | 6 | object Config { 7 | val FS_CONFIG_PATH: String = sys.env.getOrElse("CONFIG_PATH", "src/main/resources/fs.xml") 8 | 9 | def getCommonFSConfiguration: Configuration = { 10 | val fsConf: Configuration = new Configuration() 11 | fsConf.addResource(new Path(FS_CONFIG_PATH)) 12 | fsConf 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /dagster/dagit.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app 4 | 5 | RUN pip install dagit dagster-postgres dagster-aws 6 | 7 | # Copy your code and workspace to /opt/dagster/app 8 | COPY workspace.yaml /opt/dagster/app/ 9 | 10 | ENV DAGSTER_HOME=/opt/dagster/dagster_home/ 11 | 12 | COPY hackernews/dagster.yaml ${DAGSTER_HOME}/dagster.yaml 13 | 14 | WORKDIR /opt/dagster/app 15 | 16 | EXPOSE 3000 17 | 18 | ENTRYPOINT ["dagit", "-h", "0.0.0.0", "-p", "3000"] -------------------------------------------------------------------------------- /hive-metastore/Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | docker build \ 3 | -t olh/hive-metastore \ 4 | -f Dockerfile \ 5 | . 6 | 7 | build-amd64: 8 | docker buildx \ 9 | build --platform linux/amd64 \ 10 | -t olh/hive-metastore \ 11 | -f Dockerfile \ 12 | . 13 | 14 | build-arm: 15 | docker buildx \ 16 | build --platform linux/arm64/v8 \ 17 | -t olh/hive-metastore \ 18 | -f Dockerfile \ 19 | . 20 | 21 | docker-run: 22 | docker run \ 23 | -d \ 24 | --name metastore \ 25 | -p 9083:9083 \ 26 | olh/hive-metastore -------------------------------------------------------------------------------- /spark/manifests/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolume 3 | metadata: 4 | name: sparkrwx 5 | labels: 6 | type: local 7 | spec: 8 | storageClassName: manual 9 | capacity: 10 | storage: 10Gi 11 | accessModes: 12 | - ReadWriteMany 13 | hostPath: 14 | path: "" 15 | --- 16 | apiVersion: v1 17 | kind: PersistentVolumeClaim 18 | metadata: 19 | name: sparkrwx 20 | spec: 21 | storageClassName: manual 22 | volumeName: sparkrwx 23 | accessModes: 24 | - ReadWriteMany 25 | resources: 26 | requests: 27 | storage: 3Gi -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/test/scala/util/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package util 2 | import org.apache.commons.io.FileUtils 3 | 4 | import java.io.File 5 | import java.nio.file.Files 6 | import java.util.UUID 7 | 8 | object TestUtils { 9 | /** 10 | * Creates a temporary directory, which is then passed to `f` and will be deleted after `f` 11 | * returns. 12 | */ 13 | def withTempDir(f: File => Unit): Unit = { 14 | val dir = Files.createTempDirectory(UUID.randomUUID().toString).toFile 15 | try f(dir) finally { 16 | FileUtils.deleteDirectory(dir) 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /spark/manifests/role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: spark 6 | rules: 7 | - apiGroups: [""] 8 | resources: ["configmaps", "secrets", "services", "endpoints", "pods", "persistentvolumeclaims"] 9 | verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] 10 | --- 11 | apiVersion: rbac.authorization.k8s.io/v1 12 | kind: RoleBinding 13 | metadata: 14 | name: spark 15 | roleRef: 16 | apiGroup: "rbac.authorization.k8s.io" 17 | kind: Role 18 | name: spark 19 | subjects: 20 | - kind: ServiceAccount 21 | name: spark -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/build.sbt: -------------------------------------------------------------------------------- 1 | ThisBuild / version := "0.1.0-SNAPSHOT" 2 | 3 | ThisBuild / scalaVersion := "2.13.10" 4 | 5 | lazy val deltalake_operator = (project in file(".")) 6 | .settings( 7 | organization := "git.leehuwuj.olh", 8 | name := "deltalake-operator", 9 | libraryDependencies ++= Seq( 10 | "org.scalatest" %% "scalatest" % "3.2.14" % Test, 11 | "io.delta" %% "delta-standalone" % "0.5.0", 12 | "org.apache.hadoop" % "hadoop-client" % "3.3.1", 13 | "org.apache.hadoop" % "hadoop-aws" % "3.3.1", 14 | "org.apache.hadoop" % "hadoop-common" % "3.3.1" 15 | ) 16 | ) 17 | -------------------------------------------------------------------------------- /dagster/hackernews/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="hackernews", 5 | packages=find_packages(exclude=["hackernews_tests"]), 6 | install_requires=[ 7 | "dagster", 8 | "dagster-cloud" 9 | ], 10 | extras_require={ 11 | "dev": [ 12 | "pytest", 13 | "dagster-postgres", 14 | "dagster-aws", 15 | "dagster-k8s", 16 | "dagster-dbt", 17 | "dagster-pyspark", 18 | "pandas", 19 | "trino", 20 | "deltalake", 21 | "pyarrow" 22 | ] 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /jupyter/ipython_kernel_config.py: -------------------------------------------------------------------------------- 1 | # Configuration file for ipython-kernel. 2 | # See 3 | 4 | # With IPython >= 6.0.0, all outputs to stdout/stderr are captured. 5 | # It is the case for subprocesses and output of compiled libraries like Spark. 6 | # Those logs now both head to notebook logs and in notebooks outputs. 7 | # Logs are particularly verbose with Spark, that is why we turn them off through this flag. 8 | # 9 | 10 | # Attempt to capture and forward low-level output, e.g. produced by Extension 11 | # libraries. 12 | # Default: True 13 | # type:ignore 14 | c.IPKernelApp.capture_fd_output = False # noqa: F821 -------------------------------------------------------------------------------- /spark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM olh/spark-py:base 2 | 3 | ENV SPARK_JARS="/opt/spark/jars/" 4 | 5 | USER root 6 | RUN apt update && apt install -y wget 7 | 8 | # Add additional dependencies: aws sdk, delta, hadoop, hive 9 | COPY jars-downloader.sh /tmp/jars-downloader.sh 10 | RUN chmod a+x /tmp/jars-downloader.sh && /tmp/jars-downloader.sh 11 | 12 | # Add spark default config 13 | COPY spark-defaults.conf ${SPARK_HOME}/conf/spark-defaults.conf 14 | 15 | # Add hive-site.xml to indicate Spark to use external metastore. 16 | # This config is required but it's able to override when you 17 | # submit spark to kubernetes by mount a configmap 18 | COPY hive-site.xml ${SPARK_HOME}/conf/hive-site.xml 19 | 20 | # Return to spark user 21 | USER 185 22 | -------------------------------------------------------------------------------- /dagster/hackernews/hackernews/resources/trino.py: -------------------------------------------------------------------------------- 1 | from dagster import resource 2 | from trino.dbapi import connect 3 | 4 | class TrinoResource: 5 | def __init__(self, user: str , host: str, port: str, catalog: str, schema: str) -> None: 6 | self.conn = connect( 7 | user = user, 8 | host = host, 9 | port = port, 10 | catalog = catalog, 11 | schema = schema 12 | ) 13 | 14 | def query(self, sql: str): 15 | cur = self.conn.cursor() 16 | cur.execute(sql) 17 | rows = cur.fetchall() 18 | return rows 19 | 20 | @resource() 21 | def trino_client(context): 22 | return TrinoResource( 23 | schema=context.resource_config['schema'] 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /dagster/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "olh-dagster" 3 | version = "0.1.0" 4 | description = "POC Dagster in Open source lakehouse stack" 5 | authors = ["leehuwuj "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.11" 9 | dagit = "1.6.6" 10 | dagster = "1.6.6" 11 | dagster-postgres = "0.22.6" 12 | dagster-aws = "0.22.6" 13 | dagster-k8s = "0.22.6" 14 | dagster-dbt = "^0.22.6" 15 | dagster-pyspark = "0.22.6" 16 | pandas = "^2.2.1" 17 | trino = "^0.328.0" 18 | deltalake = "^0.15.3" 19 | pyarrow = "^15.0.0" 20 | dbt-core = "^1.7.8" 21 | dbt-trino = "^1.7.1" 22 | pyspark = "^3.3.1" 23 | 24 | [tool.poetry.dev-dependencies] 25 | 26 | [build-system] 27 | requires = ["poetry-core>=1.0.0"] 28 | build-backend = "poetry.core.masonry.api" 29 | -------------------------------------------------------------------------------- /dagster/hackernews/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | FROM "${BASE_IMAGE}" 3 | 4 | ARG DAGSTER_VERSION 5 | 6 | # ==> Add Dagster layer 7 | RUN \ 8 | pip install \ 9 | dagster==${DAGSTER_VERSION} \ 10 | dagster-postgres \ 11 | dagster-aws \ 12 | dagster-k8s 13 | 14 | # ==> Add user code layer 15 | # Example pipelines 16 | COPY setup.py setup.py 17 | COPY setup.cfg setup.cfg 18 | RUN pip install ".[dev]" \ 19 | && rm -rf /var \ 20 | && rm -rf /root/.cache \ 21 | && rm -rf /usr/lib/python2.7 \ 22 | && rm -rf /usr/lib/x86_64-linux-gnu/guile 23 | 24 | ARG DAGSTER_HOME=/opt/dagster 25 | ENV DAGSTER_HOME=${DAGSTER_HOME} 26 | RUN mkdir -p ${DAGSTER_HOME} 27 | 28 | WORKDIR ${DAGSTER_HOME} 29 | 30 | COPY hackernews hackernews 31 | COPY dagster.yaml . -------------------------------------------------------------------------------- /trino/tests/python_client/python_client_integration_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from trino.dbapi import connect 3 | 4 | TRINO_HOST = os.environ.get("TRINO_HOST", "host.docker.internal") 5 | TRINO_PORT = int(os.environ.get("TRINO_PORT", 8080)) 6 | TRINO_USER = os.environ.get("TRINO_USER", "python") 7 | TRINO_CATALOG = os.environ.get("TRINO_CATALOG", "default") 8 | TRINO_SCHEMA = os.environ.get("TRINO_SCHEMA", "system") 9 | 10 | def test_trino_client(): 11 | conn = connect( 12 | host=TRINO_HOST, 13 | port=TRINO_PORT, 14 | user=TRINO_USER, 15 | catalog=TRINO_CATALOG, 16 | schema=TRINO_SCHEMA 17 | ) 18 | 19 | cur = conn.cursor() 20 | cur.execute("SELECT * FROM system.runtime.nodes") 21 | rows = cur.fetchall() 22 | print(rows) 23 | assert len(rows) > 0 -------------------------------------------------------------------------------- /resources/practices/tweetschampions/tests/tweets_trino_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | @pytest.mark.usefixtures("trino_client") 4 | def test_tweets_fact_table(trino_client): 5 | cur = trino_client.cursor() 6 | cur.execute("SELECT count(1) FROM delta.tweets.tweetsfact") 7 | rows = cur.fetchall() 8 | assert len(rows) == 10 9 | 10 | @pytest.mark.usefixtures("trino_client") 11 | def test_tweets_fact_duplication(trino_client): 12 | cur = trino_client.cursor() 13 | cur.execute("SELECT count(1) FROM delta.tweets.tweetsfact") 14 | count = cur.fetchall()[0] 15 | 16 | cur.execute(""" 17 | SELECT count(1) 18 | FROM ( 19 | SELECT distinct tf.* 20 | FROM delta.tweets.tweetsfact tf 21 | ) 22 | """) 23 | 24 | distinct_count = cur.fetchall()[0] 25 | assert count == distinct_count -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/main/resources/fs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | fs.s3a.access.key 4 | 5 | 6 | 7 | fs.s3a.secret.key 8 | 9 | 10 | 11 | fs.s3a.endpoint 12 | 13 | 14 | 15 | fs.s3a.path.style.access 16 | true 17 | 18 | 19 | fs.s3a.impl 20 | org.apache.hadoop.fs.s3a.S3AFileSystem 21 | 22 | 23 | fs.s3a.aws.credentials.provider 24 | org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider 25 | 26 | -------------------------------------------------------------------------------- /resources/practices/tweetschampions/README.md: -------------------------------------------------------------------------------- 1 | ## Flow 2 | ![tweets-champions-flow](../../images/tweets-practice.png) 3 | 4 | ## Data: 5 | - [Tweets Data](https://github.com/leehuwuj/olh/blob/main/resources/data/README.md) 6 | 7 | ## Practice steps (manually): 8 | 1. Ingest raw json data into MinIO at raw path. 9 | - Download data from above link and upload into raw space in MinIO lake bucket. 10 | - You can also create a simple download and upload script in Python which is represent for a data integration. 11 | 2. Submit `pyspark_tweets_fact_ingesting.py` to filter out tweet information into `tweet_fact` table. 12 | - Upload the pyspark script into an S3 script resources bucket. 13 | - Edit the configuration in `submit.sh` and run it. 14 | 3. Testing table data in Trino. 15 | - Do adhoc query in Trino to check the table and data quality for above pipeline 16 | - // Todo 17 | 3. Create Superset dashboard. 18 | - // Todo 19 | 20 | ## Dagster (automatically): 21 | // Todo -------------------------------------------------------------------------------- /resources/practices/tweetschampions/submit.sh: -------------------------------------------------------------------------------- 1 | spark-submit \ 2 | --deploy-mode cluster \ 3 | --master k8s://https://kubernetes.docker.internal:6443 \ 4 | --name "TweetsFact pipeline" \ 5 | --conf spark.executor.instances=2 \ 6 | --conf spark.kubernetes.container.image=spark:base \ 7 | --conf spark.kubernetes.namespace=spark \ 8 | --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ 9 | --conf spark.hadoop.fs.s3a.endpoint=http://kubernetes.docker.internal:9000 \ 10 | --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \ 11 | --conf spark.hadoop.fs.s3a.access.key=[CHANGE_ME] \ 12 | --conf spark.hadoop.fs.s3a.secret.key=[CHANGE_ME] \ 13 | --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ 14 | --conf spark.hadoop.fs.s3a.fast.upload=true \ 15 | --conf spark.kubernetes.driverEnv.HIVE_METASTORE_URIS=thrift://kubernetes.docker.internal:9083 \ 16 | s3a://[CHANGE_ME].py -------------------------------------------------------------------------------- /resources/practices/tweetschampions/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(scope="session") 5 | def spark_local_session(): 6 | from pyspark.sql import SparkSession 7 | 8 | spark = ( 9 | SparkSession 10 | .builder 11 | .master("local[2]") 12 | .appName("pyspark tester") 13 | .getOrCreate() 14 | ) 15 | return spark 16 | 17 | @pytest.fixture(scope="session") 18 | def trino_client(): 19 | import os 20 | from trino.dbapi import connect 21 | 22 | TRINO_HOST = os.environ.get("TRINO_HOST", "host.docker.internal") 23 | TRINO_PORT = int(os.environ.get("TRINO_PORT", 8080)) 24 | TRINO_USER = os.environ.get("TRINO_USER", "python") 25 | TRINO_CATALOG = os.environ.get("TRINO_CATALOG", "default") 26 | TRINO_SCHEMA = os.environ.get("TRINO_SCHEMA", "tweets") 27 | 28 | conn = connect( 29 | host=TRINO_HOST, 30 | port=TRINO_PORT, 31 | user=TRINO_USER, 32 | catalog=TRINO_CATALOG, 33 | schema=TRINO_SCHEMA 34 | ) 35 | return conn -------------------------------------------------------------------------------- /jupyter/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter 2 | - Source: https://github.com/jupyter/docker-stacks/tree/main/pyspark-notebook 3 | 4 | ## Docker setup 5 | 1. Build jupyter spark image: 6 | - This Docker file is inherited from above source 7 | - Added additional dependencies for metastore, delta and s3 integration. 8 | ```shell 9 | docker build -t olh/jupyter:base -f Dockerfile . 10 | ``` 11 | 12 | 2. Run integration test to Hive metastore: 13 | - Start your Hive metastore instance, update the uri in test file if needed. 14 | - Run test in container: 15 | ```shell 16 | docker run --rm -p 10000:8888 olh/jupyter:base /tmp/tests/test.sh 17 | ``` 18 | 19 | Output: 20 | ``` 21 | ============================= test session starts ============================== 22 | platform linux -- Python 3.10.8, pytest-7.2.0, pluggy-1.0.0 23 | rootdir: /tmp/tests 24 | plugins: anyio-3.6.2 25 | collected 1 item 26 | 27 | ../../tmp/tests/notebook-tester.py . 28 | ========================= 1 passed, 1 warning in 7.89s ========================= 29 | ``` 30 | 31 | 32 | 33 | //Todo: Jupyter lab server 34 | //Todo: Jupyter- Spark kubernetes integration 35 | 36 | -------------------------------------------------------------------------------- /jupyter/jars-downloader.sh: -------------------------------------------------------------------------------- 1 | declare -a JARS=( 2 | https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.319/aws-java-sdk-bundle-1.12.319.jar \ 3 | https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.1.1/delta-core_2.12-2.1.1.jar \ 4 | https://repo1.maven.org/maven2/io/delta/delta-storage/2.1.1/delta-storage-2.1.1.jar \ 5 | https://repo1.maven.org/maven2/com/google/inject/guice/4.0/guice-4.0.jar \ 6 | https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.2.2/hadoop-aws-3.2.2.jar \ 7 | https://repo1.maven.org/maven2/org/apache/hive/hive-common/3.1.3/hive-common-3.1.3.jar \ 8 | https://repo1.maven.org/maven2/org/apache/hive/hive-exec/3.1.3/hive-exec-3.1.3.jar \ 9 | https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/3.1.3/hive-metastore-3.1.3.jar \ 10 | https://repo1.maven.org/maven2/org/apache/hive/hive-serde/3.1.3/hive-serde-3.1.3.jar 11 | ) 12 | 13 | if [[ -z "${SPARK_JARS}" ]]; 14 | then 15 | echo "SPARK_JARS environment variable is not set!" 16 | exit 1 17 | fi 18 | 19 | for jar in "${JARS[@]}" 20 | do 21 | wget $jar -P $SPARK_JARS 22 | done -------------------------------------------------------------------------------- /spark/jars-downloader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | declare -a JARS=( 4 | https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.319/aws-java-sdk-bundle-1.12.319.jar \ 5 | https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.1.1/delta-core_2.12-2.1.1.jar \ 6 | https://repo1.maven.org/maven2/io/delta/delta-storage/2.1.1/delta-storage-2.1.1.jar \ 7 | https://repo1.maven.org/maven2/com/google/inject/guice/4.0/guice-4.0.jar \ 8 | https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.2.2/hadoop-aws-3.2.2.jar \ 9 | https://repo1.maven.org/maven2/org/apache/hive/hive-common/3.1.3/hive-common-3.1.3.jar \ 10 | https://repo1.maven.org/maven2/org/apache/hive/hive-exec/3.1.3/hive-exec-3.1.3.jar \ 11 | https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/3.1.3/hive-metastore-3.1.3.jar \ 12 | https://repo1.maven.org/maven2/org/apache/hive/hive-serde/3.1.3/hive-serde-3.1.3.jar 13 | ) 14 | 15 | if [[ -z "${SPARK_JARS}" ]]; 16 | then 17 | echo "SPARK_JARS environment variable is not set!" 18 | exit 1 19 | fi 20 | 21 | for jar in "${JARS[@]}" 22 | do 23 | wget $jar -P $SPARK_JARS 24 | done 25 | -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/test/scala/delta/DeltaTableTest.scala: -------------------------------------------------------------------------------- 1 | package delta 2 | import org.scalatest.funsuite.AnyFunSuite 3 | import util.TestUtils.withTempDir 4 | 5 | class DeltaTableTest extends AnyFunSuite { 6 | test("tmp table data zero partition count") { 7 | withTempDir(dir => { 8 | val table = new DeltaTable(dir.getCanonicalPath, Seq()) 9 | val fileNum = table.getPartitionFileCount(Map(("lang", "en"))) 10 | assert(fileNum == 0) 11 | }) 12 | } 13 | 14 | test("real table data have right number of files in a partition") { 15 | val table: DeltaTable = new DeltaTable( 16 | "src/test/resources/testtable", 17 | Seq() 18 | ) 19 | val fileNum = table.getPartitionFileCount(Map()) 20 | assert(fileNum == 1) 21 | } 22 | 23 | test("test minio data with Delta scan") { 24 | val table: DeltaTable = new DeltaTable( 25 | "s3a://lake/warehouse/tweetsfact", 26 | Seq() 27 | ) 28 | val fileNum1 = table.getPartitionFileCount(Map("lang" -> "en")) 29 | val fileNum2 = table.betterGetPartitionFileCount(Map("lang" -> "en")) 30 | assert(fileNum1 == fileNum2) 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /dagster/hackernews/hackernews/resources/spark-k8s.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | from dagster_spark.configs_spark import spark_config 4 | from dagster_spark.utils import flatten_dict 5 | from pyspark.sql import SparkSession 6 | 7 | import dagster._check as check 8 | from dagster import resource 9 | 10 | 11 | def spark_session_from_config(spark_conf=None): 12 | spark_conf = check.opt_dict_param(spark_conf, "spark_conf") 13 | builder = SparkSession.builder 14 | flat = flatten_dict(spark_conf) 15 | for key, value in flat: 16 | builder = builder.config(key, value) 17 | 18 | return builder.getOrCreate() 19 | 20 | 21 | class PySparkResource: 22 | def __init__(self, spark_conf): 23 | self._spark_session = spark_session_from_config(spark_conf) 24 | 25 | @property 26 | def spark_session(self): 27 | return self._spark_session 28 | 29 | @property 30 | def spark_context(self): 31 | return self.spark_session.sparkContext 32 | 33 | 34 | @resource({"spark_conf": spark_config()}) 35 | def pyspark_resource(init_context): 36 | return PySparkResource(init_context.resource_config["spark_conf"]) -------------------------------------------------------------------------------- /resources/practices/tweetschampions/tests/tweets_fact_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyspark.sql.types import ( 4 | StructField, 5 | StructType, 6 | TimestampType, 7 | StringType, 8 | LongType 9 | ) 10 | 11 | from pyspark.sql import SparkSession 12 | from pyspark_tweets_fact_ingesting import filter_tweets_fact_table 13 | 14 | @pytest.mark.usefixtures("spark_local_session") 15 | def test_filter_tweets_fact_table(spark_local_session: SparkSession): 16 | expected_schema = StructType([ 17 | StructField('timestamp_ms', TimestampType(), True), 18 | StructField('id', LongType(), True), 19 | StructField('text', StringType(), True), 20 | StructField('source', StringType(), True), 21 | StructField('user_id', LongType(), True), 22 | StructField('lang', StringType(), True), 23 | StructField('quote_count', LongType(), True), 24 | StructField('reply_count', LongType(), True), 25 | StructField('retweet_count', LongType(), True)] 26 | ) 27 | 28 | data = spark_local_session.read.json("tests/example.json") 29 | output_df = filter_tweets_fact_table(data) 30 | 31 | assert output_df.schema == expected_schema 32 | assert output_df.count() == 10 -------------------------------------------------------------------------------- /dagster/README.md: -------------------------------------------------------------------------------- 1 | ## Prerequisite: 2 | At this point, we will combine all step into an dagster project which define an workflow for our projects. Please make sure you have already setup these tools: 3 | - Spark k8s cluster 4 | - Trino cluster 5 | - DBT 6 | 7 | ## Dagster setup: 8 | Basically, dagster project can be flexibly deployed into different environment, we should organize our project code to use environment variable that can easily change based on the deployment. 9 | ### Development (locally deploy): 10 | 1. Install python packages by Poetry: 11 | ``` 12 | poetry install 13 | ``` 14 | 15 | 2. Check & update the environment variables in the `__init__.py` of hackernews project. 16 | ``` 17 | S3_ENDPOINT = os.environ.get("AWS_S3_ENDPOINT_URL") 18 | AWS_ACCESS_KEY_ID= os.environ.get("AWS_ACCESS_KEY_ID") 19 | AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") 20 | 21 | SPARK_CLUSTER = os.environ.get("SPARK_CLUSTER", "k8s://https://host.docker.internal:8443") 22 | SPARK_METASTORE = os.environ.get("SPARK_METASTORE", "thrift://host.docker.internal:9083") 23 | # Please set this to your dagster host IP that the network from k8s cluster can connect to 24 | SPARK_DRIVER_HOST = os.environ.get("SPARK_DRIVER_HOST") 25 | 26 | DBT_HOME = os.environ.get("DBT_HOME") 27 | 28 | DBT_PROJECT_PATH = os.path.join(DBT_HOME, "olh/dbt/hackernews") 29 | DBT_PROFILES = os.path.join(DBT_HOME, "olh/dbt/hackernews") 30 | ``` 31 | 32 | 3. Start **dagit** UI: 33 | ``` 34 | poetry run dagit 35 | ``` 36 | 37 | 4. Trigger hackernews assets: 38 | ![hackernews-dagster](../resources/images/hackernews_dagster.png) 39 | -------------------------------------------------------------------------------- /jupyter/tests/spark-hivemetastore-test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.sql import SparkSession" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "spark = (SparkSession\n", 19 | " .builder\n", 20 | " .config(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\")\n", 21 | " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", 22 | " .config(\"spark.hadoop.fs.s3a.path.style.access\", \"True\")\n", 23 | " .config(\"spark.hadoop.fs.s3a.impl\", \"org.apache.hadoop.fs.s3a.S3AFileSystem\")\n", 24 | " .config(\"spark.hadoop.fs.s3a.fast.upload\", \"true\")\n", 25 | " .config(\"spark.sql.warehouse.dir\", \"/tmp/warehouse\")\n", 26 | " .config(\"hive.metastore.uris\", \"thrift://host.docker.internal:9083\")\n", 27 | " .enableHiveSupport()\n", 28 | " .getOrCreate())" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "tables = spark.sql(\"show tables\").collect()" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3.9.12 ('base')", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "name": "python", 49 | "version": "3.9.12" 50 | }, 51 | "orig_nbformat": 4, 52 | "vscode": { 53 | "interpreter": { 54 | "hash": "3d597f4c481aa0f25dceb95d2a0067e73c0966dcbd003d741d821a7208527ecf" 55 | } 56 | } 57 | }, 58 | "nbformat": 4, 59 | "nbformat_minor": 2 60 | } 61 | -------------------------------------------------------------------------------- /hive-metastore/Dockerfile: -------------------------------------------------------------------------------- 1 | # Hive 3 standalone metastore is built from Java 8 2 | # We inherited image from the base Java 8 image of Azul zulu which is compatible for ARM users 3 | FROM azul/zulu-openjdk:8 4 | 5 | WORKDIR /opt 6 | 7 | ENV HADOOP_VERSION=3.1.3 8 | ENV METASTORE_VERSION=3.0.0 9 | 10 | ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION} 11 | ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin 12 | ENV HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/*.jar:${HIVE_HOME}/lib/*.jar 13 | 14 | 15 | RUN apt update && apt install -y curl 16 | 17 | # Download and extract hadoop library to HADOOP_HOME 18 | RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - 19 | 20 | # Download and extract hive standalone metastore 21 | RUN curl -L https://repo1.maven.org/maven2/org/apache/hive/hive-standalone-metastore/${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - 22 | COPY metastore-site.xml ${HIVE_HOME}/conf 23 | COPY entrypoint.sh /entrypoint.sh 24 | RUN groupadd -r hive --gid=1000 && \ 25 | useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive && \ 26 | chown hive:hive -R ${HIVE_HOME} && \ 27 | chown hive:hive /entrypoint.sh && chmod +x /entrypoint.sh 28 | 29 | # Refresh some libraries 30 | # Download PostgresJDBC driver for hive 31 | RUN curl https://jdbc.postgresql.org/download/postgresql-42.5.0.jar > ${HIVE_HOME}/lib/postgresql-42.5.0.jar 32 | RUN rm -rf ${HIVE_HOME}/lib/guava-*.jar && \ 33 | curl https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar > ${HIVE_HOME}/lib/guava-31.1-jre.jar 34 | RUN rm -rf ${HIVE_HOME}/lib/httpcore-*.jar && \ 35 | curl https://repo1.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.15/httpcore-4.4.15.jar > ${HIVE_HOME}/lib/httpcore-4.4.15.jar 36 | 37 | 38 | USER hive 39 | EXPOSE 9083 40 | 41 | ENTRYPOINT ["sh", "-c", "/entrypoint.sh"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **[WIP]** 2 | # Open source stack for lakehouse 3 | This project to POC of a simple lakehouse architecture which aims to: 4 | - Learning: If you are students or beginner who are working with data everyday then this project could helps you to understand the tools that you are working on. 5 | - Cloud deputize testing: Nowaday, cloud services are easily to plug-and-play but there is various of tools and each of them have its own advantage as well as disadvantage that you have to take aware of. Almost of them are built on top open source stack so this project ifself is a cloud at your home! 6 | 7 | There will be no fixed deployment kind but each service is revolve around cloud-native application (containerized) which use can easily to integrate/test with your current platform. 8 | 9 | *Note*: 10 | - The deployment only for testing purpose. This project scope will not cover on security feature of lakehouse (data, table, row,...) access control, resource management. 11 | - If your machine does not have enough resources then just try the docker or single service deployment instead. 12 | 13 | # Architecture 14 | ![high-level-architecutre](resources/images/architecture.png) 15 | 16 | # Setup: 17 | ## Hive metastore: 18 | - [Hive metastore quick setup](https://github.com/leehuwuj/olh/blob/main/hive-metastore) 19 | ## Trino 20 | - [Trino quick setup](https://github.com/leehuwuj/olh/blob/main/trino) 21 | ## Spark 22 | - [Spark simple setup for Kubernetes](https://github.com/leehuwuj/olh/blob/main/spark) 23 | ## Jupyter 24 | - [Jupyter spark docker setup](https://github.com/leehuwuj/olh/blob/main/jupyter) 25 | ## Dagster 26 | - [Dagster hackernews example project](https://github.com/leehuwuj/olh/blob/main/dagster) 27 | 28 | # Practices 29 | ## Tweets Champions 30 | - [Tweets Data](https://github.com/leehuwuj/olh/blob/main/resources/data/README.md) 31 | - Examples: 32 | - [Pyspark - Tweets Fact ingestion](https://github.com/leehuwuj/olh/tree/main/resources/practices/tweetschampions) 33 | 34 | ## Dagster example project 35 | Example using Dagster to orchestrate data workflow: [Arrow -> (PyDelta + Trino or PySpark Delta) -> DBT] 36 | - [Dasgter hackernews](https://github.com/leehuwuj/olh/tree/main/dagster) -------------------------------------------------------------------------------- /jupyter/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | 29 | # Delta integration 30 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension 31 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog 32 | 33 | # S3 MinIO integration 34 | spark.hadoop.fs.s3a.path.style.access true 35 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem 36 | spark.hadoop.fs.s3a.fast.upload true 37 | spark.hadoop.fs.s3a.aws.credentials.provider org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider 38 | 39 | # Kubernetes integration 40 | spark.kubernetes.authenticate.driver.serviceAccountName spark 41 | spark.kubernetes.namespace spark 42 | spark.kubernetes.container.image olh/spark:delta -------------------------------------------------------------------------------- /spark/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | # Example: 22 | # spark.master spark://master:7077 23 | # spark.eventLog.enabled true 24 | # spark.eventLog.dir hdfs://namenode:8021/directory 25 | # spark.serializer org.apache.spark.serializer.KryoSerializer 26 | # spark.driver.memory 5g 27 | # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" 28 | 29 | # Delta integration 30 | spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension 31 | spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog 32 | 33 | # S3 MinIO integration 34 | spark.hadoop.fs.s3a.path.style.access true 35 | spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem 36 | spark.hadoop.fs.s3a.fast.upload true 37 | spark.hadoop.fs.s3a.aws.credentials.provider org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider 38 | 39 | # Kubernetes integration 40 | spark.kubernetes.authenticate.driver.serviceAccountName spark 41 | spark.kubernetes.namespace spark 42 | spark.kubernetes.container.image olh/spark:delta -------------------------------------------------------------------------------- /hive-metastore/metastore-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | metastore.thrift.uris 4 | thrift://0.0.0.0:9083 5 | Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore. 6 | 7 | 8 | metastore.task.threads.always 9 | org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.RuntimeStatsCleanerTask 10 | Comma separated list of tasks that will be started in separate threads. These will always be started, regardless of whether the metastore is running in embedded mode or in server mode. They must implement org.apache.hadoop.hive.metastore.MetastoreTaskThread 11 | 12 | 13 | metastore.expression.proxy 14 | org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy 15 | 16 | 17 | metastore.warehouse.dir 18 | [CHANGEME] 19 | 20 | 21 | javax.jdo.option.ConnectionDriverName 22 | org.postgresql.Driver 23 | 24 | 25 | javax.jdo.option.ConnectionURL 26 | jdbc:postgresql://[CHANGEME]/[CHANGEME] 27 | 28 | 29 | javax.jdo.option.ConnectionUserName 30 | [CHANGEME] 31 | 32 | 33 | javax.jdo.option.ConnectionPassword 34 | [CHANGEME] 35 | 36 | 37 | fs.s3a.access.key 38 | [CHANGEME] 39 | 40 | 41 | fs.s3a.secret.key 42 | [CHANGEME] 43 | 44 | 45 | fs.s3a.endpoint 46 | [CHANGEME] 47 | 48 | 49 | fs.s3a.path.style.access 50 | true 51 | 52 | -------------------------------------------------------------------------------- /hive-metastore/README.md: -------------------------------------------------------------------------------- 1 | # Hive standalone metastore 2 | From Hive version 3 design, it is able to deploy the metastore as standalone service. Because we apply the Trino (PrestoSQL) as default query engine so the Hive query engine is not required to include. 3 | 4 | The Hive metastore using a RDBMS to store metadata persistantly, by default it's support for MySQL but it's not problem with other DB. We will use the Postgres DB instead. 5 | 6 | # Prerequisite 7 | - Postgres: A postgres database to store hive metadata along with its access account 8 | - MinIO Bucket: Specify the warehouse path of managed data. It is optional when initial setup step. Please make sure the s3 credentials of Hive is able to access to external bucket/path as well. 9 | 10 | # Setup 11 | ## Docker 12 | ### 1. Update Hive metastore configuration 13 | Look at the [metastore-site.xml](https://github.com/leehuwuj/olh/blob/main/hive-metastore/metastore-site.xml) file and edit all **[CHANGE_ME]** corresponding to your instance information. 14 | 15 | **Trick**: If you are testing at your local machine, put the endpoint related to Docker image as `host.docker.internal` along with target service port to easily access the service. Examples: 16 | - MinIO endpoint: `http://host.docker.internal:9000` 17 | - Postgres endpoint: `jdbc:postgresql://host.docker.internal:5432/[DB]` 18 | 19 | The `metastore-site.xml` file is located at `${HIVE_HOME}/conf`. Based on your deployment, it can be mounted/replacement for security purpose! 20 | 21 | ### 2. Build-up the image 22 | From my testing, Hive only supports Java 8, my Dockerfile is inherited from the Azul Java 8 which is compatible to run on my ARM machine (Apple silicon M1 Pro), it'll work well on Intel/AMD also. 23 | 24 | Run the command to build the docker image: 25 | ```shell 26 | docker build \ 27 | -t olh/hive-metastore \ 28 | -f Dockerfile \ 29 | . 30 | ``` 31 | or just simply `Make` it by: 32 | ```shell 33 | make build 34 | ``` 35 | 36 | ### 3. Init database schema: 37 | Run a Hive container to init the schema on Postgres: 38 | ```shell 39 | docker run \ 40 | --name metastore-init-db \ 41 | olh/hive-metastore \ 42 | init 43 | ``` 44 | 45 | ### 4. Start the Hive metastore container: 46 | ```shell 47 | docker run \ 48 | --name metastore \ 49 | -p 9083:9083 \ 50 | olh/hive-metastore \ 51 | run 52 | ``` -------------------------------------------------------------------------------- /trino/README.md: -------------------------------------------------------------------------------- 1 | # Trino query engine 2 | 3 | # Prerequisite 4 | - [Hive metastore](https://github.com/leehuwuj/olh/blob/main/hive-metastore) 5 | - MinIO Bucket: Specify the warehouse path of managed data. It is optional when initial setup step. Please make sure the s3 credentials which Trino is able to access to and the external bucket/path as well. 6 | 7 | # Setup 8 | ## Docker 9 | ### 1. Update Trino catalog config for Hive metastore 10 | Look at the [hive.properties](https://github.com/leehuwuj/olh/blob/main/trino/etc/catalog/hive.properties) file and edit all **[CHANGE_ME]** corresponding to your instance information. 11 | 12 | **Tricks**: 13 | - If you are testing at your local machine, put the endpoint related to Docker image as `host.docker.internal` along with target service port to easily access the service. Examples: 14 | - MinIO endpoint: `http://host.docker.internal:9000` 15 | - Hive thrift server endpoint: `thrift://host.docker.internal:9083` 16 | 17 | - You can point out config value to a environment variable by using this syntax: `${ENV:VARIABLE_NAME}`. Example: 18 | ```hive.s3.aws-secret-key=${ENV:AWS_SECRET}``` 19 | 20 | ### 2. Build-up the image 21 | We will reuse the original docker image of Trino. 22 | ``` 23 | docker pull trinodb/trino 24 | ``` 25 | 26 | ### 3. Start trino in Docker container: 27 | - Mount the above catalog config file only: 28 | ```shell 29 | docker run \ 30 | -d \ 31 | --name trino \ 32 | -p 8080:8080 \ 33 | --volume $PWD/etc/catalog:/etc/trino/catalog \ 34 | trinodb/trino 35 | ``` 36 | 37 | # Access 38 | Trino support many kinds of authentication (password, oauth2, kerberos,...) but we will not cover the security in this project scope. 39 | By default, you can access to all trino resource. 40 | 41 | ## JDBC: 42 | - As other JDBC drivers, you have to download the Trino JDBC driver, add to client application and using trino connection uri. 43 | - JDBC driver download: https://repo1.maven.org/maven2/io/trino/trino-jdbc/ 44 | - Connection uri: 45 | ``` 46 | jdbc:trino://:@:// 47 | ``` 48 | *Trick*: DBeaver is a database tool supports many kind of databases through JDBC. You can easily connect to Trino via DBeaver in your local machine. 49 | 50 | ## Python client: 51 | - Github source code: https://github.com/trinodb/trino-python-client -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/main/scala/delta/DeltaTable.scala: -------------------------------------------------------------------------------- 1 | package delta 2 | import io.delta.standalone.expressions.{And, EqualTo, Expression, Literal} 3 | import util.Config 4 | import io.delta.standalone.{DeltaLog, DeltaScan, Snapshot} 5 | import io.delta.standalone.types.StructType 6 | import org.apache.hadoop.conf.Configuration 7 | import scala.collection.convert.ImplicitConversions.`map AsScala` 8 | 9 | class DeltaTable(tablePath: String, partitions: Seq[String]) { 10 | private val config: Configuration = Config.getCommonFSConfiguration 11 | private val deltaLog: DeltaLog = DeltaLog.forTable(config, tablePath); 12 | private var snapshot: Snapshot = deltaLog.update(); 13 | 14 | private def fetchLatestSnapshot(): Unit = { 15 | snapshot = deltaLog.update() 16 | } 17 | 18 | private def getLatestSchema: StructType = { 19 | fetchLatestSnapshot() 20 | snapshot.getMetadata.getSchema 21 | } 22 | 23 | def getLatestVersion: Long = { 24 | fetchLatestSnapshot() 25 | snapshot.getVersion 26 | } 27 | 28 | def getPartitionFileCount(partitions: Map[String, String]): Int = { 29 | if(partitions.isEmpty){ 30 | 0 31 | } else { 32 | fetchLatestSnapshot() 33 | var partitionFileCounter = 0 34 | snapshot.getAllFiles.forEach(file => { 35 | var filePartitions = file.getPartitionValues.toMap 36 | val diff = filePartitions.keys.toSet.diff(partitions.keySet) 37 | filePartitions = filePartitions.removedAll(diff) 38 | if (filePartitions == partitions) { 39 | partitionFileCounter += 1 40 | } 41 | }) 42 | partitionFileCounter 43 | } 44 | } 45 | 46 | def betterGetPartitionFileCount(partitions: Map[String, String]): Int = { 47 | val schema: StructType = getLatestSchema 48 | // Generate scan expression 49 | val partitionExps: List[Expression] = 50 | partitions.keys 51 | .map(key => new EqualTo(schema.column(key), Literal.of(partitions.get(key).orNull))) 52 | .toList 53 | .filter(exp => exp != null) 54 | if(partitionExps.nonEmpty){ 55 | val scan: DeltaScan = deltaLog 56 | .startTransaction 57 | .markFilesAsRead(partitionExps.reduce(new And(_, _))) 58 | var fileCounter: Int = 0; 59 | val iter = scan.getFiles 60 | while(iter.hasNext) { 61 | fileCounter += 1; 62 | iter.next() 63 | } 64 | fileCounter 65 | } else { 66 | 0 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /delta-lake/deltalake-operator/src/test/resources/testtable/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 2 | {"metaData":{"id":"251768af-cd0c-4aed-ad83-8030ac22959e","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"_corrupt_record\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"meta\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"producer\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"method\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"session_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"url\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uuid\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1668398466921}} 3 | {"add":{"path":"part-00000-80cd0b0f-8390-4766-bcb8-d38cff354b3e-c000.snappy.parquet","partitionValues":{},"size":4605751,"modificationTime":1668398471000,"dataChange":true,"stats":"{\"numRecords\":100001,\"minValues\":{\"_corrupt_record\":\"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\",\"meta\":{\"producer\":{\"timestamp\":\"2021-03-24T15:06:17.321893+00:00\"}},\"method\":\"DELETE\",\"session_id\":\"0015924a-78ef-4105-8783-cc8dc6a4\",\"status\":200,\"url\":\"http://www.163.com\",\"uuid\":\"00009a75-8cb3-4da4-9663-233045fe\"},\"maxValues\":{\"_corrupt_record\":\"web_requests-100K.json\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000�\",\"meta\":{\"producer\":{\"timestamp\":\"2021-03-24T15:06:21.199282+00:00\"}},\"method\":\"PUT\",\"session_id\":\"fff81625-d7e1-433e-8490-579bc3e9�\",\"status\":500,\"url\":\"http://www.zhihu.com\",\"uuid\":\"fffdf38b-1a91-491d-a810-4ddd73bb�\"},\"nullCount\":{\"_corrupt_record\":99999,\"meta\":{\"producer\":{\"timestamp\":2}},\"method\":2,\"session_id\":2,\"status\":2,\"url\":2,\"uuid\":2}}"}} 4 | {"commitInfo":{"timestamp":1668398471605,"operation":"CREATE TABLE AS SELECT","operationParameters":{"isManaged":"true","description":null,"partitionBy":"[]","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"100001","numOutputBytes":"4605751"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"c966882b-de35-4a07-972b-afaca77a043d"}} 5 | -------------------------------------------------------------------------------- /resources/practices/tweetschampions/pyspark_tweets_fact_ingesting.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from typing import Any 4 | from pyspark.sql.dataframe import DataFrame 5 | 6 | import pyspark.sql.functions as F 7 | from pyspark.sql import SparkSession 8 | 9 | HIVE_METASTORE_URIS = os.environ.get( 10 | "HIVE_METASTORE_URIS", 11 | "thrift://localhost:9083" 12 | ) 13 | TWEETS_DATA_PATH = os.environ.get( 14 | "TWEETS_DATA_PATH", 15 | "[CHANGE_ME]/TweetsChampions.json" 16 | ) 17 | TWEETS_FACT_TABLE_NAME = os.environ.get( 18 | "TWEETS_FACT_TABLE_NAME", 19 | "tweets.tweetsfact" 20 | ) 21 | 22 | def init_spark() -> SparkSession: 23 | """Init a new Spark session which supports Deltalake""" 24 | return ( 25 | SparkSession 26 | .builder 27 | .config("hive.metastore.uris", HIVE_METASTORE_URIS) 28 | .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") 29 | .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") 30 | .config("spark.hadoop.fs.s3a.path.style.access", "True") 31 | .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") 32 | .enableHiveSupport() 33 | .getOrCreate() 34 | ) 35 | 36 | def read_json_data(spark: SparkSession, s3_path: str) -> DataFrame: 37 | return spark.read.json(s3_path) 38 | 39 | def write_delta_table( 40 | data: DataFrame, 41 | table: str, 42 | mode: str = "append", 43 | partitions: list = [], 44 | options: dict = {}) -> Any: 45 | ( 46 | data 47 | .write 48 | .format("delta") 49 | .partitionBy(partitions) 50 | .options(**options) 51 | .mode(mode) 52 | .saveAsTable(table) 53 | ) 54 | 55 | def filter_tweets_fact_table(tweets_df: DataFrame) -> DataFrame: 56 | return tweets_df.select( 57 | # Convert timestamp_ms from unix string into timestamp type 58 | F.to_timestamp((F.col('timestamp_ms')/1000)).alias('timestamp_ms'), 59 | 'id', 60 | 'text', 61 | 'source', 62 | F.col('user.id').alias('user_id'), 63 | 'lang', 64 | 'quote_count', 65 | 'reply_count', 66 | 'retweet_count' 67 | ) 68 | 69 | def tweets_fact_pipeline(spark: SparkSession): 70 | # Read raw data 71 | tweets_df = read_json_data( 72 | spark, 73 | s3_path=TWEETS_DATA_PATH 74 | ) 75 | 76 | # Filter data 77 | tweet_fact_df = filter_tweets_fact_table(tweets_df) 78 | tweet_fact_df.printSchema() 79 | 80 | # Write to delta table 81 | write_delta_table( 82 | data=tweet_fact_df, 83 | table=TWEETS_FACT_TABLE_NAME, 84 | mode="overwrite", 85 | partitions=["lang"], 86 | options={ 87 | "overwriteSchema": "true" 88 | } 89 | ) 90 | 91 | if __name__ == "__main__": 92 | spark = init_spark() 93 | tweets_fact_pipeline(spark=spark) -------------------------------------------------------------------------------- /spark/README.md: -------------------------------------------------------------------------------- 1 | # Spark kubernetes 2 | ## Preparation 3 | ### Kubernetes cluster 4 | - Make sure your kubernetes cluster is available and there is an namespace for Spark. 5 | ``` 6 | kubectl create namespace spark 7 | ``` 8 | 9 | - Update Spark manifest corespond to your environment (host mount volume path, cpu and ram resources,...) 10 | 11 | - Apply the k8s resources for Spark: 12 | ``` 13 | kubectl -n spark apply -f manifests 14 | ``` 15 | 16 | 17 | ### Docker image 18 | 1. Build base spark image 19 | - From your host machine, move to SPARK_HOME project and build the base image (python binding support): 20 | ```shell 21 | cd $SPARK_HOME && ./bin/docker-image-tool.sh -r olh -t base -p ./kubernetes/dockerfiles/spark/bindings/python/Dockerfile build 22 | ``` 23 | 24 | 2. Move back to this directory and build new spark image which expands from the above base image and adding supports for delta lake. 25 | ```shell 26 | docker build -t olh/spark:delta -f Dockerfile . 27 | ``` 28 | 29 | ## Submit spark app to kubernetes cluster 30 | - Run spark-submit which target cluster to k8s: 31 | ```shell 32 | spark-submit \ 33 | --deploy-mode cluster \ 34 | --master k8s://https://kubernetes.docker.internal:6443 \ 35 | --name spark-pi \ 36 | --class org.apache.spark.examples.SparkPi \ 37 | --conf spark.executor.instances=2 \ 38 | --conf spark.kubernetes.container.image=olh/spark:delta \ 39 | --conf spark.kubernetes.namespace=spark \ 40 | --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ 41 | local:///tmp/spark-examples_2.12-3.3.1.jar 42 | ``` 43 | 44 | - We can better submit the Spark job defined from MinIO: 45 | ```shell 46 | spark-submit \ 47 | --deploy-mode cluster \ 48 | --master k8s://[CHANGE_ME] \ 49 | --name spark-pi \ 50 | --class org.apache.spark.examples.SparkPi \ 51 | --conf spark.executor.instances=2 \ 52 | --conf spark.kubernetes.container.image=olh/spark:delta \ 53 | --conf spark.kubernetes.namespace=spark \ 54 | --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ 55 | --conf spark.hadoop.fs.s3a.endpoint=[CHANGE_ME]\ 56 | --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \ 57 | --conf spark.hadoop.fs.s3a.access.key=[CHANGE_ME] \ 58 | --conf spark.hadoop.fs.s3a.secret.key=[CHANGE_ME] \ 59 | --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ 60 | --conf spark.hadoop.fs.s3a.fast.upload=true \ 61 | s3a:/// 62 | ``` 63 | 64 | > Note that, there are already default configurations which we defined in Spark image, please check them in spark-defaults.conf files. So actually, you dont need to specify them when submiting. I just put them here for the case you wanna overwrite the default config when submitting. 65 | 66 | - Check k8s job: 67 | ```shell 68 | (base) ➜ kubectl -n spark get pod 69 | NAME READY STATUS RESTARTS AGE 70 | spark-pi-167abb84a59fcca8-driver 0/1 Completed 0 7s 71 | spark-pi-e1323b84a59fd7e0-exec-1 0/1 Terminating 0 4s 72 | spark-pi-e1323b84a59fd7e0-exec-2 0/1 Terminating 0 4s 73 | ``` 74 | -------------------------------------------------------------------------------- /jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | ARG OWNER=jupyter 4 | ARG BASE_CONTAINER=$OWNER/scipy-notebook 5 | FROM $BASE_CONTAINER 6 | 7 | LABEL maintainer="Jupyter Project " 8 | 9 | # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 10 | # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 11 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 12 | 13 | USER root 14 | 15 | # Spark dependencies 16 | # Default values can be overridden at build time 17 | # (ARGS are in lower case to distinguish them from ENV) 18 | ARG spark_version="3.3.1" 19 | ARG hadoop_version="3" 20 | ARG scala_version 21 | ARG spark_checksum="769db39a560a95fd88b58ed3e9e7d1e92fb68ee406689fb4d30c033cb5911e05c1942dcc70e5ec4585df84e80aabbc272b9386a208debda89522efff1335c8ff" 22 | ARG openjdk_version="17" 23 | 24 | ENV APACHE_SPARK_VERSION="${spark_version}" \ 25 | HADOOP_VERSION="${hadoop_version}" 26 | 27 | RUN apt-get update --yes && \ 28 | apt-get install --yes --no-install-recommends \ 29 | "openjdk-${openjdk_version}-jre-headless" \ 30 | ca-certificates-java && \ 31 | apt-get clean && rm -rf /var/lib/apt/lists/* 32 | 33 | # Spark installation 34 | WORKDIR /tmp 35 | 36 | RUN if [ -z "${scala_version}" ]; then \ 37 | wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \ 38 | else \ 39 | wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \ 40 | fi && \ 41 | echo "${spark_checksum} *spark.tgz" | sha512sum -c - && \ 42 | tar xzf "spark.tgz" -C /usr/local --owner root --group root --no-same-owner && \ 43 | rm "spark.tgz" 44 | 45 | # Configure Spark 46 | ENV SPARK_HOME=/usr/local/spark 47 | ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info" \ 48 | PATH="${PATH}:${SPARK_HOME}/bin" 49 | ENV SPARK_JARS="${SPARK_HOME}/jars/" 50 | 51 | RUN if [ -z "${scala_version}" ]; then \ 52 | ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" "${SPARK_HOME}"; \ 53 | else \ 54 | ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}" "${SPARK_HOME}"; \ 55 | fi && \ 56 | # Add a link in the before_notebook hook in order to source automatically PYTHONPATH && \ 57 | mkdir -p /usr/local/bin/before-notebook.d && \ 58 | ln -s "${SPARK_HOME}/sbin/spark-config.sh" /usr/local/bin/before-notebook.d/spark-config.sh 59 | 60 | COPY jars-downloader.sh /tmp/jars-downloader.sh 61 | RUN chmod a+x /tmp/jars-downloader.sh && /tmp/jars-downloader.sh 62 | 63 | # Add spark default config 64 | COPY spark-defaults.conf ${SPARK_HOME}/conf/spark-defaults.conf 65 | 66 | # Configure IPython system-wide 67 | COPY ipython_kernel_config.py "/etc/ipython/" 68 | RUN fix-permissions "/etc/ipython/" 69 | 70 | COPY tests /tmp/tests 71 | RUN chown -R ${NB_UID} /tmp/tests && chmod a+x /tmp/tests/test.sh 72 | 73 | USER ${NB_UID} 74 | 75 | # Install pyarrow 76 | RUN mamba install --quiet --yes \ 77 | 'pyarrow' 'pytest' 'testbook' && \ 78 | mamba clean --all -f -y && \ 79 | fix-permissions "${CONDA_DIR}" && \ 80 | fix-permissions "/home/${NB_USER}" 81 | 82 | WORKDIR "${HOME}" 83 | 84 | EXPOSE 4040 -------------------------------------------------------------------------------- /dagster/hackernews/hackernews/io/arrow_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import pyarrow as pa 3 | import pyarrow.parquet as pq 4 | import pandas as pd 5 | from pyarrow.fs import FileSystem, LocalFileSystem, S3FileSystem 6 | 7 | from dagster import Any, IOManager, InputContext, OutputContext, io_manager 8 | from dagster import ( 9 | Field, 10 | InputContext, 11 | DagsterInvalidConfigError, 12 | OutputContext, 13 | StringSource, 14 | _check as check, 15 | io_manager, 16 | ) 17 | from sqlalchemy import Sequence 18 | 19 | 20 | class ArrowDataSetIO(IOManager): 21 | def __init__(self, 22 | filesystem: FileSystem, 23 | prefix: str) -> None: 24 | self.filesystem = filesystem 25 | self.prefix = prefix 26 | super().__init__() 27 | 28 | def _get_configured_filesystem() -> "FileSystem": 29 | """ 30 | Get Arrow file system related to running environment 31 | """ 32 | # todo 33 | 34 | def _get_stored_path(self, context: Union[InputContext, OutputContext]) -> str: 35 | path: Sequence[str] 36 | if context.has_asset_key: 37 | path = context.get_asset_identifier() 38 | else: 39 | path = ["storage", *context.get_identifier()] 40 | 41 | return "/".join([self.prefix,*path]) 42 | 43 | def load_input(self, context: "InputContext") -> Any: 44 | context.log.info(vars(context.upstream_output)) 45 | file_format = context.upstream_output.metadata['stored_as'] 46 | context.log.info(f"Stored format: {file_format}") 47 | if file_format == "parquet": 48 | data_path = self._get_stored_path(context=context) 49 | data = pq.ParquetDataset( 50 | path_or_paths=data_path, 51 | filesystem=self.filesystem 52 | ) 53 | return data 54 | 55 | def handle_output(self, context: "OutputContext", obj: Any) -> None: 56 | context.log.info(vars(context)) 57 | if isinstance(obj, pd.DataFrame): 58 | file_format =context.metadata.get("stored_as") 59 | # context.log.info(f"File format: {file_format}") 60 | if file_format == 'parquet': 61 | data_path = f"{self._get_stored_path(context=context)}" 62 | context.log.info(data_path) 63 | table = pa.Table.from_pandas(obj) 64 | pq.write_to_dataset( 65 | table, 66 | root_path=data_path, 67 | filesystem=self.filesystem 68 | ) 69 | context.log.info(f"Path: {data_path}") 70 | return super().handle_output(context, obj) 71 | 72 | @io_manager( 73 | config_schema={ 74 | "filesystem": Field(StringSource), 75 | "endpoint_url": Field(StringSource), 76 | "prefix": Field(StringSource, is_required=False, default_value="dagster") 77 | } 78 | ) 79 | def arrow_dataset_io(init_context) -> ArrowDataSetIO: 80 | 81 | configured_filesystem = init_context.resource_config.get("filesystem") 82 | if configured_filesystem == "LocalFileSystem": 83 | filesystem = LocalFileSystem() 84 | elif configured_filesystem == "S3FileSystem": 85 | filesystem = S3FileSystem( 86 | endpoint_override=init_context.resource_config.get("endpoint_url") 87 | ) 88 | else: 89 | raise DagsterInvalidConfigError( 90 | errors="The configured filesystem {} is not supporterd".format(filesystem) 91 | ) 92 | prefix = init_context.resource_config["prefix"] 93 | arrow_io_manager = ArrowDataSetIO( 94 | filesystem=filesystem, 95 | prefix=prefix 96 | ) 97 | return arrow_io_manager -------------------------------------------------------------------------------- /dagster/hackernews/hackernews/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | from dagster import Definitions, colored_console_logger, load_assets_from_modules 4 | 5 | from dagster_aws.s3 import s3_pickle_io_manager, s3_resource 6 | from dagster_pyspark import pyspark_resource 7 | 8 | from dagster_dbt import dbt_cli_resource 9 | 10 | from . import assets 11 | from .resources.trino import trino_client 12 | from .io.arrow_dataset import arrow_dataset_io 13 | 14 | 15 | S3_ENDPOINT = os.environ.get("AWS_ENDPOINT_URL") 16 | AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") 17 | AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") 18 | 19 | SPARK_CLUSTER = os.environ.get( 20 | "SPARK_CLUSTER", "k8s://https://host.docker.internal:8443" 21 | ) 22 | SPARK_METASTORE = os.environ.get( 23 | "SPARK_METASTORE", "thrift://host.docker.internal:9083" 24 | ) 25 | # Please set this to your dagster host IP that the network from k8s cluster can connect to 26 | SPARK_DRIVER_HOST = os.environ.get("SPARK_DRIVER_HOST") 27 | 28 | DBT_HOME = os.environ.get("DBT_HOME") 29 | 30 | DBT_PROJECT_PATH = os.path.join(DBT_HOME, "olh/dbt/hackernews") 31 | DBT_PROFILES = os.path.join(DBT_HOME, "olh/dbt/hackernews") 32 | 33 | 34 | def get_default_config_k8s() -> Dict: 35 | return { 36 | "spark.master": SPARK_CLUSTER, 37 | "spark.kubernetes.namespace": "spark", 38 | "hive.metastore.uris": SPARK_METASTORE, 39 | "spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension", 40 | "spark.sql.catalog.spark_catalog": "org.apache.spark.sql.delta.catalog.DeltaCatalog", 41 | "spark.driver.host": SPARK_DRIVER_HOST, 42 | "spark.app.name": "dagster_", 43 | "spark.kubernetes.container.image": "olh/spark:delta", 44 | "spark.kubernetes.authenticate.driver.serviceAccountName": "spark", 45 | "spark.hadoop.fs.s3a.access.key": AWS_ACCESS_KEY_ID, 46 | "spark.hadoop.fs.s3a.secret.key": AWS_SECRET_ACCESS_KEY, 47 | "spark.hadoop.fs.s3a.endpoint": S3_ENDPOINT, 48 | "spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem", 49 | "spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider", 50 | "spark.hadoop.fs.s3a.fast.upload": "true", 51 | "spark.kubernetes.file.upload.path": "s3a://lake-dev/spark", 52 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkrwx.options.claimName": "sparkrwx", 53 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkrwx.options.claimName": "sparkrwx", 54 | "spark.kubernetes.driver.volumes.persistentVolumeClaim.sparkrwx.mount.path": "/opt/spark/work-dir", 55 | "spark.kubernetes.executor.volumes.persistentVolumeClaim.sparkrwx.mount.path": "/opt/spark/work-dir", 56 | "spark.driver.extraJavaOptions": "-Divy.cache.dir=/opt/spark/work-dir/tmp -Divy.home=/opt/spark/work-dir/tmp", 57 | "spark.executor.instances": "1", 58 | "spark.sql.execution.arrow.pyspark.enabled": "true", 59 | } 60 | 61 | 62 | dbt_resources = { 63 | "dbt": dbt_cli_resource.configured( 64 | { 65 | "project_dir": DBT_PROJECT_PATH, 66 | "profiles_dir": DBT_PROFILES, 67 | }, 68 | ), 69 | } 70 | 71 | 72 | defs = Definitions( 73 | assets=load_assets_from_modules([assets]), 74 | loggers={"console": colored_console_logger.configured({"log_level": "ERROR"})}, 75 | resources={ 76 | "s3_io_manager": s3_pickle_io_manager.configured( 77 | {"s3_bucket": "lake-dev", "s3_prefix": "dagster/hackernews"} 78 | ), 79 | "arrow": arrow_dataset_io.configured( 80 | { 81 | "filesystem": "S3FileSystem", 82 | "endpoint_url": S3_ENDPOINT, 83 | "prefix": "lake-dev/dagster/hackernews", 84 | } 85 | ), 86 | "s3": s3_resource.configured( 87 | { 88 | "endpoint_url": S3_ENDPOINT, 89 | } 90 | ), 91 | "trino_client": trino_client.configured({"schema": "hackernews"}), 92 | "pyspark": pyspark_resource.configured( 93 | {"spark_conf": get_default_config_k8s()} 94 | ), 95 | **dbt_resources, 96 | }, 97 | ) 98 | -------------------------------------------------------------------------------- /dagster/hackernews/hackernews/assets.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import requests 3 | from dagster import MetadataValue, Output, asset 4 | from deltalake.writer import write_deltalake 5 | 6 | 7 | @asset( 8 | io_manager_key='s3_io_manager', 9 | required_resource_keys={'s3'} 10 | ) 11 | def hackernews_top_story_ids(context): 12 | """ 13 | Get top stories from the HackerNews top stories endpoint. 14 | API Docs: https://github.com/HackerNews/API#new-top-and-best-stories 15 | """ 16 | # context.resources.s3 17 | top_story_ids = requests.get( 18 | "https://hacker-news.firebaseio.com/v0/topstories.json" 19 | ).json() 20 | return top_story_ids[:10] 21 | 22 | 23 | # asset dependencies can be inferred from parameter names 24 | @asset( 25 | key_prefix=['hackernews'], 26 | io_manager_key='arrow', 27 | metadata={"stored_as": "parquet"} 28 | ) 29 | def hackernews_top_stories(context, hackernews_top_story_ids): 30 | """Get items based on story ids from the HackerNews items endpoint""" 31 | results = [] 32 | for item_id in hackernews_top_story_ids: 33 | item = requests.get( 34 | f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json" 35 | ).json() 36 | results.append(item) 37 | 38 | df = pd.DataFrame(results) 39 | 40 | # recorded metadata can be customized 41 | metadata = { 42 | "num_records": len(df), 43 | "preview": MetadataValue.md(df[["title", "by", "url"]].to_markdown()) 44 | } 45 | 46 | context.add_output_metadata(metadata) 47 | 48 | return Output(value=df, metadata=metadata) 49 | 50 | 51 | @asset( 52 | key_prefix=['hackernews'], 53 | required_resource_keys={'trino_client'}, 54 | io_manager_key='arrow', 55 | config_schema={"max_result": int}, 56 | metadata={"stored_as": "parquet"} 57 | ) 58 | def hackernews_items(context): 59 | def get_last_carwled_item_id(context): 60 | rs = context.resources.trino_client.query( 61 | "SELECT max(id) FROM hackernews.items" 62 | ) 63 | print(rs) 64 | return rs[0][0] 65 | 66 | arg_max_result = context.op_config.get("max_result", 10) 67 | # get latest id 68 | try: 69 | last_id = int(get_last_carwled_item_id(context)) 70 | except Exception as e: 71 | raise(e) 72 | 73 | print(f"Last id: {last_id}") 74 | max_id = int(requests.get( 75 | "https://hacker-news.firebaseio.com/v0/maxitem.json" 76 | ).json()) 77 | print(f"Max id: {max_id}") 78 | to_crawl_ids = list(range(last_id + 1, min(max_id, last_id+arg_max_result))) 79 | 80 | results = [] 81 | for item_id in to_crawl_ids: 82 | item = requests.get( 83 | f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json" 84 | ).json() 85 | results.append(item) 86 | 87 | df = pd.DataFrame(results) 88 | 89 | metadata = { 90 | "num_records": len(df), 91 | "preview": MetadataValue.md(df[["id", "by", "time"]].iloc[0:10].to_markdown()) 92 | } 93 | 94 | return Output(value=df, metadata=metadata) 95 | 96 | 97 | @asset( 98 | required_resource_keys={'s3', 'pyspark'}, 99 | compute_kind="pyspark", 100 | ) 101 | def items_spark(context, hackernews_items): 102 | items_pdf = hackernews_items.read().to_pandas() 103 | spark = context.resources.pyspark.spark_session 104 | spark_df = spark.createDataFrame(items_pdf) 105 | 106 | (spark_df 107 | .write 108 | .format("delta") 109 | .mode("overwrite") 110 | .saveAsTable("hackernews.items")) 111 | 112 | 113 | @asset( 114 | key_prefix=['hackernews'], 115 | compute_kind="delta-rs", 116 | required_resource_keys={'trino_client'} 117 | ) 118 | def items(context, hackernews_items): 119 | from trino.exceptions import TrinoUserError 120 | 121 | # storage_options = { 122 | # "AWS_ACCESS_KEY_ID": "", 123 | # "AWS_SECRET_ACCESS_KEY":"", 124 | # "AWS_ENDPOINT_URL": "", 125 | # "AWS_REGION": "", 126 | # "AWS_S3_ALLOW_UNSAFE_RENAME": "true", 127 | # "AWS_STORAGE_ALLOW_HTTP": "true" 128 | # } 129 | storage_options = context.resources.arrow.storage_options 130 | 131 | write_deltalake( 132 | table_or_uri="s3://lake-dev/warehouse/hackernews.db/items", 133 | storage_options=storage_options, 134 | data=hackernews_items.read(), 135 | overwrite_schema=True 136 | ) 137 | 138 | # Register metastore 139 | try: 140 | context.resources.trino_client.query( 141 | """CALL system.register_table( 142 | schema_name => 'hackernews', 143 | table_name => 'items', 144 | table_location => 's3a://lake-dev/warehouse/hackernews.db/items' 145 | )""" 146 | ) 147 | except TrinoUserError as e: 148 | if e.error_name == 'ALREADY_EXISTS': 149 | context.log.info("Skip register delta table since table already existed!") 150 | return 151 | 152 | 153 | assets = [ 154 | hackernews_top_story_ids, 155 | hackernews_top_stories, 156 | hackernews_items, 157 | items_spark, 158 | items 159 | ] 160 | -------------------------------------------------------------------------------- /dagster/poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "agate" 3 | version = "1.7.0" 4 | description = "A data analysis library that is optimized for humans instead of machines." 5 | category = "main" 6 | optional = false 7 | python-versions = "*" 8 | 9 | [package.dependencies] 10 | Babel = ">=2.0" 11 | isodate = ">=0.5.4" 12 | leather = ">=0.3.2" 13 | parsedatetime = ">=2.1,<2.5 || >2.5,<2.6 || >2.6" 14 | python-slugify = ">=1.2.1" 15 | pytimeparse = ">=1.1.5" 16 | 17 | [package.extras] 18 | docs = ["Sphinx (>=1.2.2)", "sphinx-rtd-theme (>=0.1.6)"] 19 | test = ["coverage (>=3.7.1)", "cssselect (>=0.9.1)", "lxml (>=3.6.0)", "pytest", "pytest-cov", "pytz (>=2015.4)", "PyICU (>=2.4.2)"] 20 | 21 | [[package]] 22 | name = "alembic" 23 | version = "1.9.4" 24 | description = "A database migration tool for SQLAlchemy." 25 | category = "main" 26 | optional = false 27 | python-versions = ">=3.7" 28 | 29 | [package.dependencies] 30 | Mako = "*" 31 | SQLAlchemy = ">=1.3.0" 32 | 33 | [package.extras] 34 | tz = ["python-dateutil"] 35 | 36 | [[package]] 37 | name = "aniso8601" 38 | version = "9.0.1" 39 | description = "A library for parsing ISO 8601 strings." 40 | category = "main" 41 | optional = false 42 | python-versions = "*" 43 | 44 | [package.extras] 45 | dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"] 46 | 47 | [[package]] 48 | name = "anyio" 49 | version = "3.6.2" 50 | description = "High level compatibility layer for multiple asynchronous event loop implementations" 51 | category = "main" 52 | optional = false 53 | python-versions = ">=3.6.2" 54 | 55 | [package.dependencies] 56 | idna = ">=2.8" 57 | sniffio = ">=1.1" 58 | 59 | [package.extras] 60 | doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"] 61 | test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"] 62 | trio = ["trio (>=0.16,<0.22)"] 63 | 64 | [[package]] 65 | name = "attrs" 66 | version = "22.2.0" 67 | description = "Classes Without Boilerplate" 68 | category = "main" 69 | optional = false 70 | python-versions = ">=3.6" 71 | 72 | [package.extras] 73 | cov = ["attrs", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] 74 | dev = ["attrs"] 75 | docs = ["furo", "sphinx", "myst-parser", "zope.interface", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] 76 | tests = ["attrs", "zope.interface"] 77 | tests-no-zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] 78 | tests_no_zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] 79 | 80 | [[package]] 81 | name = "babel" 82 | version = "2.11.0" 83 | description = "Internationalization utilities" 84 | category = "main" 85 | optional = false 86 | python-versions = ">=3.6" 87 | 88 | [package.dependencies] 89 | pytz = ">=2015.7" 90 | 91 | [[package]] 92 | name = "backoff" 93 | version = "2.2.1" 94 | description = "Function decoration for backoff and retry" 95 | category = "main" 96 | optional = false 97 | python-versions = ">=3.7,<4.0" 98 | 99 | [[package]] 100 | name = "betterproto" 101 | version = "1.2.5" 102 | description = "A better Protobuf / gRPC generator & library" 103 | category = "main" 104 | optional = false 105 | python-versions = ">=3.6" 106 | 107 | [package.dependencies] 108 | grpclib = "*" 109 | stringcase = "*" 110 | 111 | [package.extras] 112 | compiler = ["black", "jinja2", "protobuf"] 113 | 114 | [[package]] 115 | name = "boto3" 116 | version = "1.26.79" 117 | description = "The AWS SDK for Python" 118 | category = "main" 119 | optional = false 120 | python-versions = ">= 3.7" 121 | 122 | [package.dependencies] 123 | botocore = ">=1.29.79,<1.30.0" 124 | jmespath = ">=0.7.1,<2.0.0" 125 | s3transfer = ">=0.6.0,<0.7.0" 126 | 127 | [package.extras] 128 | crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] 129 | 130 | [[package]] 131 | name = "botocore" 132 | version = "1.29.79" 133 | description = "Low-level, data-driven core of boto 3." 134 | category = "main" 135 | optional = false 136 | python-versions = ">= 3.7" 137 | 138 | [package.dependencies] 139 | jmespath = ">=0.7.1,<2.0.0" 140 | python-dateutil = ">=2.1,<3.0.0" 141 | urllib3 = ">=1.25.4,<1.27" 142 | 143 | [package.extras] 144 | crt = ["awscrt (==0.16.9)"] 145 | 146 | [[package]] 147 | name = "cachetools" 148 | version = "5.3.0" 149 | description = "Extensible memoizing collections and decorators" 150 | category = "main" 151 | optional = false 152 | python-versions = "~=3.7" 153 | 154 | [[package]] 155 | name = "certifi" 156 | version = "2022.12.7" 157 | description = "Python package for providing Mozilla's CA Bundle." 158 | category = "main" 159 | optional = false 160 | python-versions = ">=3.6" 161 | 162 | [[package]] 163 | name = "cffi" 164 | version = "1.15.1" 165 | description = "Foreign Function Interface for Python calling C code." 166 | category = "main" 167 | optional = false 168 | python-versions = "*" 169 | 170 | [package.dependencies] 171 | pycparser = "*" 172 | 173 | [[package]] 174 | name = "charset-normalizer" 175 | version = "3.0.1" 176 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 177 | category = "main" 178 | optional = false 179 | python-versions = "*" 180 | 181 | [[package]] 182 | name = "click" 183 | version = "8.1.3" 184 | description = "Composable command line interface toolkit" 185 | category = "main" 186 | optional = false 187 | python-versions = ">=3.7" 188 | 189 | [package.dependencies] 190 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 191 | 192 | [[package]] 193 | name = "colorama" 194 | version = "0.4.6" 195 | description = "Cross-platform colored terminal text." 196 | category = "main" 197 | optional = false 198 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 199 | 200 | [[package]] 201 | name = "coloredlogs" 202 | version = "14.0" 203 | description = "Colored terminal output for Python's logging module" 204 | category = "main" 205 | optional = false 206 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 207 | 208 | [package.dependencies] 209 | humanfriendly = ">=7.1" 210 | 211 | [package.extras] 212 | cron = ["capturer (>=2.4)"] 213 | 214 | [[package]] 215 | name = "commonmark" 216 | version = "0.9.1" 217 | description = "Python parser for the CommonMark Markdown spec" 218 | category = "main" 219 | optional = false 220 | python-versions = "*" 221 | 222 | [package.extras] 223 | test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] 224 | 225 | [[package]] 226 | name = "croniter" 227 | version = "1.3.8" 228 | description = "croniter provides iteration for datetime object with cron like format" 229 | category = "main" 230 | optional = false 231 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 232 | 233 | [package.dependencies] 234 | python-dateutil = "*" 235 | 236 | [[package]] 237 | name = "dagit" 238 | version = "1.1.19" 239 | description = "Web UI for dagster." 240 | category = "main" 241 | optional = false 242 | python-versions = "*" 243 | 244 | [package.dependencies] 245 | click = ">=7.0,<9.0" 246 | dagster = "1.1.19" 247 | dagster-graphql = "1.1.19" 248 | starlette = "*" 249 | uvicorn = {version = "*", extras = ["standard"]} 250 | 251 | [package.extras] 252 | notebook = ["nbconvert"] 253 | test = ["starlette"] 254 | 255 | [[package]] 256 | name = "dagster" 257 | version = "1.1.19" 258 | description = "The data orchestration platform built for productivity." 259 | category = "main" 260 | optional = false 261 | python-versions = "*" 262 | 263 | [package.dependencies] 264 | alembic = ">=1.2.1,<1.6.3 || >1.6.3,<1.7.0 || >1.7.0" 265 | click = ">=5.0" 266 | coloredlogs = ">=6.1,<=14.0" 267 | croniter = ">=0.3.34" 268 | docstring-parser = "*" 269 | grpcio = ">=1.32.0,<1.48.1" 270 | grpcio-health-checking = ">=1.32.0,<1.44.0" 271 | Jinja2 = "*" 272 | packaging = ">=20.9" 273 | pendulum = "*" 274 | protobuf = ">=3.13.0,<4" 275 | psutil = {version = ">=1.0", markers = "platform_system == \"Windows\""} 276 | pydantic = "*" 277 | python-dateutil = "*" 278 | python-dotenv = "*" 279 | pytz = "*" 280 | pywin32 = {version = "!=226", markers = "platform_system == \"Windows\""} 281 | PyYAML = ">=5.1" 282 | requests = "*" 283 | sqlalchemy = ">=1.0,<2.0.0" 284 | tabulate = "*" 285 | tomli = "*" 286 | toposort = ">=1.0" 287 | tqdm = "*" 288 | typing-extensions = ">=4.0.1" 289 | universal-pathlib = "*" 290 | watchdog = ">=0.8.3" 291 | 292 | [package.extras] 293 | black = ["black[jupyter] (==22.12.0)"] 294 | docker = ["docker"] 295 | pyright = ["pyright (==1.1.293)", "pandas-stubs", "types-backports", "types-certifi", "types-chardet", "types-croniter", "types-cryptography", "types-mock", "types-paramiko", "types-pkg-resources", "types-pyopenssl", "types-python-dateutil", "types-pyyaml", "types-pytz", "types-requests", "types-simplejson", "types-six", "types-sqlalchemy", "types-tabulate", "types-tzlocal", "types-toml"] 296 | ruff = ["ruff (==0.0.241)"] 297 | test = ["docker", "grpcio-tools (>=1.32.0,<1.44.0)", "mock (==3.0.5)", "objgraph", "pytest-cov (==2.10.1)", "pytest-dependency (==0.5.1)", "pytest-mock (==3.3.1)", "pytest-rerunfailures (==10.0)", "pytest-runner (==5.2)", "pytest-xdist (==2.1.0)", "pytest (==7.0.1)", "responses", "snapshottest (==0.6.0)", "tox (==3.25.0)", "yamllint", "buildkite-test-collector"] 298 | 299 | [[package]] 300 | name = "dagster-aws" 301 | version = "0.17.19" 302 | description = "Package for AWS-specific Dagster framework solid and resource components." 303 | category = "main" 304 | optional = false 305 | python-versions = "*" 306 | 307 | [package.dependencies] 308 | boto3 = "*" 309 | dagster = "1.1.19" 310 | packaging = "*" 311 | requests = "*" 312 | 313 | [package.extras] 314 | pyspark = ["dagster-pyspark"] 315 | redshift = ["psycopg2-binary"] 316 | test = ["moto (>=2.2.8)", "requests-mock", "xmltodict (==0.12.0)"] 317 | 318 | [[package]] 319 | name = "dagster-dbt" 320 | version = "0.17.19" 321 | description = "A Dagster integration for dbt" 322 | category = "main" 323 | optional = false 324 | python-versions = "*" 325 | 326 | [package.dependencies] 327 | dagster = "1.1.19" 328 | dbt-core = "*" 329 | requests = "*" 330 | typer = {version = "*", extras = ["all"]} 331 | 332 | [package.extras] 333 | test = ["jinja2", "dbt-rpc (<0.3.0)", "dbt-postgres"] 334 | 335 | [[package]] 336 | name = "dagster-graphql" 337 | version = "1.1.19" 338 | description = "The GraphQL frontend to python dagster." 339 | category = "main" 340 | optional = false 341 | python-versions = "*" 342 | 343 | [package.dependencies] 344 | dagster = "1.1.19" 345 | gql = {version = ">=3.0.0", extras = ["requests"]} 346 | graphene = ">=3" 347 | requests = "*" 348 | starlette = "*" 349 | 350 | [[package]] 351 | name = "dagster-k8s" 352 | version = "0.17.19" 353 | description = "A Dagster integration for k8s" 354 | category = "main" 355 | optional = false 356 | python-versions = "*" 357 | 358 | [package.dependencies] 359 | dagster = "1.1.19" 360 | kubernetes = "*" 361 | 362 | [[package]] 363 | name = "dagster-postgres" 364 | version = "0.17.19" 365 | description = "A Dagster integration for postgres" 366 | category = "main" 367 | optional = false 368 | python-versions = "*" 369 | 370 | [package.dependencies] 371 | dagster = "1.1.19" 372 | psycopg2-binary = "*" 373 | 374 | [[package]] 375 | name = "dagster-pyspark" 376 | version = "0.17.19" 377 | description = "Package for PySpark Dagster framework components." 378 | category = "main" 379 | optional = false 380 | python-versions = "*" 381 | 382 | [package.dependencies] 383 | dagster = "1.1.19" 384 | dagster-spark = "0.17.19" 385 | pyspark = {version = ">=3.0.0", markers = "python_version >= \"3.8\""} 386 | 387 | [[package]] 388 | name = "dagster-spark" 389 | version = "0.17.19" 390 | description = "Package for Spark Dagster framework components." 391 | category = "main" 392 | optional = false 393 | python-versions = "*" 394 | 395 | [package.dependencies] 396 | dagster = "1.1.19" 397 | 398 | [[package]] 399 | name = "dbt-core" 400 | version = "1.4.3" 401 | description = "With dbt, data analysts and engineers can build analytics the way engineers build applications." 402 | category = "main" 403 | optional = false 404 | python-versions = ">=3.7.2" 405 | 406 | [package.dependencies] 407 | agate = ">=1.6,<1.7.1" 408 | betterproto = "1.2.5" 409 | cffi = ">=1.9,<2.0.0" 410 | click = ">=7.0,<9" 411 | colorama = ">=0.3.9,<0.4.7" 412 | dbt-extractor = ">=0.4.1,<0.5.0" 413 | hologram = ">=0.0.14,<=0.0.15" 414 | idna = ">=2.5,<4" 415 | isodate = ">=0.6,<0.7" 416 | Jinja2 = "3.1.2" 417 | logbook = ">=1.5,<1.6" 418 | mashumaro = {version = "3.3.1", extras = ["msgpack"]} 419 | minimal-snowplow-tracker = "0.0.2" 420 | networkx = {version = ">=2.3,<3", markers = "python_version >= \"3.8\""} 421 | packaging = ">20.9" 422 | pathspec = ">=0.9,<0.11" 423 | pyyaml = ">=6.0" 424 | requests = "<3.0.0" 425 | sqlparse = ">=0.2.3,<0.5" 426 | typing-extensions = ">=3.7.4" 427 | werkzeug = ">=1,<3" 428 | 429 | [[package]] 430 | name = "dbt-extractor" 431 | version = "0.4.1" 432 | description = "A tool to analyze and extract information from Jinja used in dbt projects." 433 | category = "main" 434 | optional = false 435 | python-versions = ">=3.6.1" 436 | 437 | [[package]] 438 | name = "dbt-trino" 439 | version = "1.4.0" 440 | description = "The trino adapter plugin for dbt (data build tool)" 441 | category = "main" 442 | optional = false 443 | python-versions = ">=3.7" 444 | 445 | [package.dependencies] 446 | dbt-core = ">=1.4.0,<1.5.0" 447 | trino = "0.321.0" 448 | 449 | [[package]] 450 | name = "deltalake" 451 | version = "0.7.0" 452 | description = "Native Delta Lake Python binding based on delta-rs with Pandas integration" 453 | category = "main" 454 | optional = false 455 | python-versions = ">=3.7" 456 | 457 | [package.dependencies] 458 | pyarrow = ">=7" 459 | 460 | [package.extras] 461 | devel = ["mypy", "black", "isort", "packaging (>=20)", "pytest", "pytest-mock", "pytest-cov", "pytest-timeout", "sphinx (<=4.5)", "sphinx-rtd-theme", "toml", "wheel", "pytest-benchmark"] 462 | pyspark = ["pyspark", "delta-spark", "numpy (==1.22.2)"] 463 | pandas = ["pandas"] 464 | 465 | [[package]] 466 | name = "docstring-parser" 467 | version = "0.15" 468 | description = "Parse Python docstrings in reST, Google and Numpydoc format" 469 | category = "main" 470 | optional = false 471 | python-versions = ">=3.6,<4.0" 472 | 473 | [[package]] 474 | name = "fsspec" 475 | version = "2023.1.0" 476 | description = "File-system specification" 477 | category = "main" 478 | optional = false 479 | python-versions = ">=3.7" 480 | 481 | [package.extras] 482 | abfs = ["adlfs"] 483 | adl = ["adlfs"] 484 | arrow = ["pyarrow (>=1)"] 485 | dask = ["dask", "distributed"] 486 | dropbox = ["dropboxdrivefs", "requests", "dropbox"] 487 | entrypoints = ["importlib-metadata"] 488 | fuse = ["fusepy"] 489 | gcs = ["gcsfs"] 490 | git = ["pygit2"] 491 | github = ["requests"] 492 | gs = ["gcsfs"] 493 | gui = ["panel"] 494 | hdfs = ["pyarrow (>=1)"] 495 | http = ["requests", "aiohttp (!=4.0.0a0,!=4.0.0a1)"] 496 | libarchive = ["libarchive-c"] 497 | oci = ["ocifs"] 498 | s3 = ["s3fs"] 499 | sftp = ["paramiko"] 500 | smb = ["smbprotocol"] 501 | ssh = ["paramiko"] 502 | tqdm = ["tqdm"] 503 | 504 | [[package]] 505 | name = "future" 506 | version = "0.18.3" 507 | description = "Clean single-source support for Python 3 and 2" 508 | category = "main" 509 | optional = false 510 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 511 | 512 | [[package]] 513 | name = "google-auth" 514 | version = "2.16.1" 515 | description = "Google Authentication Library" 516 | category = "main" 517 | optional = false 518 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" 519 | 520 | [package.dependencies] 521 | cachetools = ">=2.0.0,<6.0" 522 | pyasn1-modules = ">=0.2.1" 523 | rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} 524 | six = ">=1.9.0" 525 | 526 | [package.extras] 527 | aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] 528 | enterprise_cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] 529 | pyopenssl = ["pyopenssl (>=20.0.0)", "cryptography (>=38.0.3)"] 530 | reauth = ["pyu2f (>=0.1.5)"] 531 | requests = ["requests (>=2.20.0,<3.0.0dev)"] 532 | 533 | [[package]] 534 | name = "gql" 535 | version = "3.4.0" 536 | description = "GraphQL client for Python" 537 | category = "main" 538 | optional = false 539 | python-versions = "*" 540 | 541 | [package.dependencies] 542 | backoff = ">=1.11.1,<3.0" 543 | graphql-core = ">=3.2,<3.3" 544 | requests = {version = ">=2.26,<3", optional = true, markers = "extra == \"requests\""} 545 | requests-toolbelt = {version = ">=0.9.1,<1", optional = true, markers = "extra == \"requests\""} 546 | urllib3 = {version = ">=1.26", optional = true, markers = "extra == \"requests\""} 547 | yarl = ">=1.6,<2.0" 548 | 549 | [package.extras] 550 | aiohttp = ["aiohttp (>=3.7.1,<3.9.0)"] 551 | all = ["aiohttp (>=3.7.1,<3.9.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26)", "botocore (>=1.21,<2)", "websockets (>=9,<10)", "websockets (>=10,<11)"] 552 | botocore = ["botocore (>=1.21,<2)"] 553 | dev = ["aiohttp (>=3.7.1,<3.9.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26)", "botocore (>=1.21,<2)", "black (==22.3.0)", "check-manifest (>=0.42,<1)", "flake8 (==3.8.1)", "isort (==4.3.21)", "mypy (==0.910)", "sphinx (>=3.0.0,<4)", "sphinx-rtd-theme (>=0.4,<1)", "sphinx-argparse (==0.2.5)", "types-aiofiles", "types-mock", "types-requests", "parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "mock (==4.0.2)", "vcrpy (==4.0.2)", "aiofiles", "websockets (>=9,<10)", "websockets (>=10,<11)"] 554 | requests = ["requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26)"] 555 | test = ["aiohttp (>=3.7.1,<3.9.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26)", "botocore (>=1.21,<2)", "parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "mock (==4.0.2)", "vcrpy (==4.0.2)", "aiofiles", "websockets (>=9,<10)", "websockets (>=10,<11)"] 556 | test_no_transport = ["parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "mock (==4.0.2)", "vcrpy (==4.0.2)", "aiofiles"] 557 | websockets = ["websockets (>=9,<10)", "websockets (>=10,<11)"] 558 | 559 | [[package]] 560 | name = "graphene" 561 | version = "3.2.1" 562 | description = "GraphQL Framework for Python" 563 | category = "main" 564 | optional = false 565 | python-versions = "*" 566 | 567 | [package.dependencies] 568 | aniso8601 = ">=8,<10" 569 | graphql-core = ">=3.1,<3.3" 570 | graphql-relay = ">=3.1,<3.3" 571 | 572 | [package.extras] 573 | dev = ["black (==22.3.0)", "flake8 (>=4,<5)", "pytest (>=6,<7)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytest-asyncio (>=0.16,<2)", "snapshottest (>=0.6,<1)", "coveralls (>=3.3,<4)", "mock (>=4,<5)", "pytz (==2022.1)", "iso8601 (>=1,<2)"] 574 | test = ["pytest (>=6,<7)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytest-asyncio (>=0.16,<2)", "snapshottest (>=0.6,<1)", "coveralls (>=3.3,<4)", "mock (>=4,<5)", "pytz (==2022.1)", "iso8601 (>=1,<2)"] 575 | 576 | [[package]] 577 | name = "graphql-core" 578 | version = "3.2.3" 579 | description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL." 580 | category = "main" 581 | optional = false 582 | python-versions = ">=3.6,<4" 583 | 584 | [[package]] 585 | name = "graphql-relay" 586 | version = "3.2.0" 587 | description = "Relay library for graphql-core" 588 | category = "main" 589 | optional = false 590 | python-versions = ">=3.6,<4" 591 | 592 | [package.dependencies] 593 | graphql-core = ">=3.2,<3.3" 594 | 595 | [[package]] 596 | name = "greenlet" 597 | version = "2.0.2" 598 | description = "Lightweight in-process concurrent programming" 599 | category = "main" 600 | optional = false 601 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" 602 | 603 | [package.extras] 604 | docs = ["sphinx", "docutils (<0.18)"] 605 | test = ["objgraph", "psutil"] 606 | 607 | [[package]] 608 | name = "grpcio" 609 | version = "1.48.0" 610 | description = "HTTP/2-based RPC framework" 611 | category = "main" 612 | optional = false 613 | python-versions = ">=3.6" 614 | 615 | [package.dependencies] 616 | six = ">=1.5.2" 617 | 618 | [package.extras] 619 | protobuf = ["grpcio-tools (>=1.48.0)"] 620 | 621 | [[package]] 622 | name = "grpcio-health-checking" 623 | version = "1.43.0" 624 | description = "Standard Health Checking Service for gRPC" 625 | category = "main" 626 | optional = false 627 | python-versions = ">=3.6" 628 | 629 | [package.dependencies] 630 | grpcio = ">=1.43.0" 631 | protobuf = ">=3.6.0" 632 | 633 | [[package]] 634 | name = "grpclib" 635 | version = "0.4.3" 636 | description = "Pure-Python gRPC implementation for asyncio" 637 | category = "main" 638 | optional = false 639 | python-versions = ">=3.7" 640 | 641 | [package.dependencies] 642 | h2 = ">=3.1.0,<5" 643 | multidict = "*" 644 | 645 | [package.extras] 646 | protobuf = ["protobuf (>=3.15.0)"] 647 | 648 | [[package]] 649 | name = "h11" 650 | version = "0.14.0" 651 | description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" 652 | category = "main" 653 | optional = false 654 | python-versions = ">=3.7" 655 | 656 | [[package]] 657 | name = "h2" 658 | version = "4.1.0" 659 | description = "HTTP/2 State-Machine based protocol implementation" 660 | category = "main" 661 | optional = false 662 | python-versions = ">=3.6.1" 663 | 664 | [package.dependencies] 665 | hpack = ">=4.0,<5" 666 | hyperframe = ">=6.0,<7" 667 | 668 | [[package]] 669 | name = "hologram" 670 | version = "0.0.15" 671 | description = "JSON schema generation from dataclasses" 672 | category = "main" 673 | optional = false 674 | python-versions = "*" 675 | 676 | [package.dependencies] 677 | jsonschema = ">=3.0,<4.0" 678 | python-dateutil = ">=2.8,<2.9" 679 | 680 | [[package]] 681 | name = "hpack" 682 | version = "4.0.0" 683 | description = "Pure-Python HPACK header compression" 684 | category = "main" 685 | optional = false 686 | python-versions = ">=3.6.1" 687 | 688 | [[package]] 689 | name = "httptools" 690 | version = "0.5.0" 691 | description = "A collection of framework independent HTTP protocol utils." 692 | category = "main" 693 | optional = false 694 | python-versions = ">=3.5.0" 695 | 696 | [package.extras] 697 | test = ["Cython (>=0.29.24,<0.30.0)"] 698 | 699 | [[package]] 700 | name = "humanfriendly" 701 | version = "10.0" 702 | description = "Human friendly output for text interfaces using Python" 703 | category = "main" 704 | optional = false 705 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 706 | 707 | [package.dependencies] 708 | pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} 709 | 710 | [[package]] 711 | name = "hyperframe" 712 | version = "6.0.1" 713 | description = "HTTP/2 framing layer for Python" 714 | category = "main" 715 | optional = false 716 | python-versions = ">=3.6.1" 717 | 718 | [[package]] 719 | name = "idna" 720 | version = "3.4" 721 | description = "Internationalized Domain Names in Applications (IDNA)" 722 | category = "main" 723 | optional = false 724 | python-versions = ">=3.5" 725 | 726 | [[package]] 727 | name = "isodate" 728 | version = "0.6.1" 729 | description = "An ISO 8601 date/time/duration parser and formatter" 730 | category = "main" 731 | optional = false 732 | python-versions = "*" 733 | 734 | [package.dependencies] 735 | six = "*" 736 | 737 | [[package]] 738 | name = "jinja2" 739 | version = "3.1.2" 740 | description = "A very fast and expressive template engine." 741 | category = "main" 742 | optional = false 743 | python-versions = ">=3.7" 744 | 745 | [package.dependencies] 746 | MarkupSafe = ">=2.0" 747 | 748 | [package.extras] 749 | i18n = ["Babel (>=2.7)"] 750 | 751 | [[package]] 752 | name = "jmespath" 753 | version = "1.0.1" 754 | description = "JSON Matching Expressions" 755 | category = "main" 756 | optional = false 757 | python-versions = ">=3.7" 758 | 759 | [[package]] 760 | name = "jsonschema" 761 | version = "3.2.0" 762 | description = "An implementation of JSON Schema validation for Python" 763 | category = "main" 764 | optional = false 765 | python-versions = "*" 766 | 767 | [package.dependencies] 768 | attrs = ">=17.4.0" 769 | pyrsistent = ">=0.14.0" 770 | six = ">=1.11.0" 771 | 772 | [package.extras] 773 | format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] 774 | format_nongpl = ["idna", "jsonpointer (>1.13)", "webcolors", "rfc3986-validator (>0.1.0)", "rfc3339-validator"] 775 | 776 | [[package]] 777 | name = "kubernetes" 778 | version = "26.1.0" 779 | description = "Kubernetes python client" 780 | category = "main" 781 | optional = false 782 | python-versions = ">=3.6" 783 | 784 | [package.dependencies] 785 | certifi = ">=14.05.14" 786 | google-auth = ">=1.0.1" 787 | python-dateutil = ">=2.5.3" 788 | pyyaml = ">=5.4.1" 789 | requests = "*" 790 | requests-oauthlib = "*" 791 | six = ">=1.9.0" 792 | urllib3 = ">=1.24.2" 793 | websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.0 || >=0.43.0" 794 | 795 | [package.extras] 796 | adal = ["adal (>=1.0.2)"] 797 | 798 | [[package]] 799 | name = "leather" 800 | version = "0.3.4" 801 | description = "Python charting for 80% of humans." 802 | category = "main" 803 | optional = false 804 | python-versions = "*" 805 | 806 | [package.dependencies] 807 | six = ">=1.6.1" 808 | 809 | [[package]] 810 | name = "logbook" 811 | version = "1.5.3" 812 | description = "A logging replacement for Python" 813 | category = "main" 814 | optional = false 815 | python-versions = "*" 816 | 817 | [package.extras] 818 | all = ["redis", "brotli", "pytest (>4.0)", "execnet (>=1.0.9)", "cython", "pyzmq", "pytest-cov (>=2.6)", "sqlalchemy", "jinja2"] 819 | compression = ["brotli"] 820 | dev = ["pytest-cov (>=2.6)", "pytest (>4.0)", "cython"] 821 | execnet = ["execnet (>=1.0.9)"] 822 | jinja = ["jinja2"] 823 | redis = ["redis"] 824 | sqlalchemy = ["sqlalchemy"] 825 | test = ["pytest-cov (>=2.6)", "pytest (>4.0)"] 826 | zmq = ["pyzmq"] 827 | 828 | [[package]] 829 | name = "mako" 830 | version = "1.2.4" 831 | description = "A super-fast templating language that borrows the best ideas from the existing templating languages." 832 | category = "main" 833 | optional = false 834 | python-versions = ">=3.7" 835 | 836 | [package.dependencies] 837 | MarkupSafe = ">=0.9.2" 838 | 839 | [package.extras] 840 | babel = ["babel"] 841 | lingua = ["lingua"] 842 | testing = ["pytest"] 843 | 844 | [[package]] 845 | name = "markupsafe" 846 | version = "2.1.2" 847 | description = "Safely add untrusted strings to HTML/XML markup." 848 | category = "main" 849 | optional = false 850 | python-versions = ">=3.7" 851 | 852 | [[package]] 853 | name = "mashumaro" 854 | version = "3.3.1" 855 | description = "Fast serialization framework on top of dataclasses" 856 | category = "main" 857 | optional = false 858 | python-versions = ">=3.7" 859 | 860 | [package.dependencies] 861 | msgpack = {version = ">=0.5.6", optional = true, markers = "extra == \"msgpack\""} 862 | typing-extensions = ">=4.1.0" 863 | 864 | [package.extras] 865 | msgpack = ["msgpack (>=0.5.6)"] 866 | orjson = ["orjson"] 867 | toml = ["tomli-w (>=1.0)", "tomli (>=1.1.0)"] 868 | yaml = ["pyyaml (>=3.13)"] 869 | 870 | [[package]] 871 | name = "minimal-snowplow-tracker" 872 | version = "0.0.2" 873 | description = "A minimal snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games" 874 | category = "main" 875 | optional = false 876 | python-versions = "*" 877 | 878 | [package.dependencies] 879 | requests = ">=2.2.1,<3.0" 880 | six = ">=1.9.0,<2.0" 881 | 882 | [[package]] 883 | name = "msgpack" 884 | version = "1.0.4" 885 | description = "MessagePack serializer" 886 | category = "main" 887 | optional = false 888 | python-versions = "*" 889 | 890 | [[package]] 891 | name = "multidict" 892 | version = "6.0.4" 893 | description = "multidict implementation" 894 | category = "main" 895 | optional = false 896 | python-versions = ">=3.7" 897 | 898 | [[package]] 899 | name = "networkx" 900 | version = "2.8.8" 901 | description = "Python package for creating and manipulating graphs and networks" 902 | category = "main" 903 | optional = false 904 | python-versions = ">=3.8" 905 | 906 | [package.extras] 907 | default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"] 908 | developer = ["pre-commit (>=2.20)", "mypy (>=0.982)"] 909 | doc = ["sphinx (>=5.2)", "pydata-sphinx-theme (>=0.11)", "sphinx-gallery (>=0.11)", "numpydoc (>=1.5)", "pillow (>=9.2)", "nb2plots (>=0.6)", "texext (>=0.6.6)"] 910 | extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"] 911 | test = ["pytest (>=7.2)", "pytest-cov (>=4.0)", "codecov (>=2.1)"] 912 | 913 | [[package]] 914 | name = "numpy" 915 | version = "1.24.2" 916 | description = "Fundamental package for array computing in Python" 917 | category = "main" 918 | optional = false 919 | python-versions = ">=3.8" 920 | 921 | [[package]] 922 | name = "oauthlib" 923 | version = "3.2.2" 924 | description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" 925 | category = "main" 926 | optional = false 927 | python-versions = ">=3.6" 928 | 929 | [package.extras] 930 | rsa = ["cryptography (>=3.0.0)"] 931 | signals = ["blinker (>=1.4.0)"] 932 | signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] 933 | 934 | [[package]] 935 | name = "packaging" 936 | version = "23.0" 937 | description = "Core utilities for Python packages" 938 | category = "main" 939 | optional = false 940 | python-versions = ">=3.7" 941 | 942 | [[package]] 943 | name = "pandas" 944 | version = "1.5.3" 945 | description = "Powerful data structures for data analysis, time series, and statistics" 946 | category = "main" 947 | optional = false 948 | python-versions = ">=3.8" 949 | 950 | [package.dependencies] 951 | numpy = [ 952 | {version = ">=1.20.3", markers = "python_version < \"3.10\""}, 953 | {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, 954 | {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, 955 | ] 956 | python-dateutil = ">=2.8.1" 957 | pytz = ">=2020.1" 958 | 959 | [package.extras] 960 | test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] 961 | 962 | [[package]] 963 | name = "parsedatetime" 964 | version = "2.4" 965 | description = "Parse human-readable date/time text." 966 | category = "main" 967 | optional = false 968 | python-versions = "*" 969 | 970 | [package.dependencies] 971 | future = "*" 972 | 973 | [[package]] 974 | name = "pathspec" 975 | version = "0.10.3" 976 | description = "Utility library for gitignore style pattern matching of file paths." 977 | category = "main" 978 | optional = false 979 | python-versions = ">=3.7" 980 | 981 | [[package]] 982 | name = "pendulum" 983 | version = "2.1.2" 984 | description = "Python datetimes made easy" 985 | category = "main" 986 | optional = false 987 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 988 | 989 | [package.dependencies] 990 | python-dateutil = ">=2.6,<3.0" 991 | pytzdata = ">=2020.1" 992 | 993 | [[package]] 994 | name = "protobuf" 995 | version = "3.20.3" 996 | description = "Protocol Buffers" 997 | category = "main" 998 | optional = false 999 | python-versions = ">=3.7" 1000 | 1001 | [[package]] 1002 | name = "psutil" 1003 | version = "5.9.4" 1004 | description = "Cross-platform lib for process and system monitoring in Python." 1005 | category = "main" 1006 | optional = false 1007 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 1008 | 1009 | [package.extras] 1010 | test = ["ipaddress", "mock", "enum34", "pywin32", "wmi"] 1011 | 1012 | [[package]] 1013 | name = "psycopg2-binary" 1014 | version = "2.9.5" 1015 | description = "psycopg2 - Python-PostgreSQL Database Adapter" 1016 | category = "main" 1017 | optional = false 1018 | python-versions = ">=3.6" 1019 | 1020 | [[package]] 1021 | name = "py4j" 1022 | version = "0.10.9.5" 1023 | description = "Enables Python programs to dynamically access arbitrary Java objects" 1024 | category = "main" 1025 | optional = false 1026 | python-versions = "*" 1027 | 1028 | [[package]] 1029 | name = "pyarrow" 1030 | version = "10.0.1" 1031 | description = "Python library for Apache Arrow" 1032 | category = "main" 1033 | optional = false 1034 | python-versions = ">=3.7" 1035 | 1036 | [package.dependencies] 1037 | numpy = ">=1.16.6" 1038 | 1039 | [[package]] 1040 | name = "pyasn1" 1041 | version = "0.4.8" 1042 | description = "ASN.1 types and codecs" 1043 | category = "main" 1044 | optional = false 1045 | python-versions = "*" 1046 | 1047 | [[package]] 1048 | name = "pyasn1-modules" 1049 | version = "0.2.8" 1050 | description = "A collection of ASN.1-based protocols modules." 1051 | category = "main" 1052 | optional = false 1053 | python-versions = "*" 1054 | 1055 | [package.dependencies] 1056 | pyasn1 = ">=0.4.6,<0.5.0" 1057 | 1058 | [[package]] 1059 | name = "pycparser" 1060 | version = "2.21" 1061 | description = "C parser in Python" 1062 | category = "main" 1063 | optional = false 1064 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 1065 | 1066 | [[package]] 1067 | name = "pydantic" 1068 | version = "1.10.5" 1069 | description = "Data validation and settings management using python type hints" 1070 | category = "main" 1071 | optional = false 1072 | python-versions = ">=3.7" 1073 | 1074 | [package.dependencies] 1075 | typing-extensions = ">=4.2.0" 1076 | 1077 | [package.extras] 1078 | dotenv = ["python-dotenv (>=0.10.4)"] 1079 | email = ["email-validator (>=1.0.3)"] 1080 | 1081 | [[package]] 1082 | name = "pygments" 1083 | version = "2.14.0" 1084 | description = "Pygments is a syntax highlighting package written in Python." 1085 | category = "main" 1086 | optional = false 1087 | python-versions = ">=3.6" 1088 | 1089 | [package.extras] 1090 | plugins = ["importlib-metadata"] 1091 | 1092 | [[package]] 1093 | name = "pyreadline3" 1094 | version = "3.4.1" 1095 | description = "A python implementation of GNU readline." 1096 | category = "main" 1097 | optional = false 1098 | python-versions = "*" 1099 | 1100 | [[package]] 1101 | name = "pyrsistent" 1102 | version = "0.19.3" 1103 | description = "Persistent/Functional/Immutable data structures" 1104 | category = "main" 1105 | optional = false 1106 | python-versions = ">=3.7" 1107 | 1108 | [[package]] 1109 | name = "pyspark" 1110 | version = "3.3.2" 1111 | description = "Apache Spark Python API" 1112 | category = "main" 1113 | optional = false 1114 | python-versions = ">=3.7" 1115 | 1116 | [package.dependencies] 1117 | py4j = "0.10.9.5" 1118 | 1119 | [package.extras] 1120 | ml = ["numpy (>=1.15)"] 1121 | mllib = ["numpy (>=1.15)"] 1122 | pandas_on_spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 1123 | sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 1124 | 1125 | [[package]] 1126 | name = "python-dateutil" 1127 | version = "2.8.2" 1128 | description = "Extensions to the standard Python datetime module" 1129 | category = "main" 1130 | optional = false 1131 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 1132 | 1133 | [package.dependencies] 1134 | six = ">=1.5" 1135 | 1136 | [[package]] 1137 | name = "python-dotenv" 1138 | version = "1.0.0" 1139 | description = "Read key-value pairs from a .env file and set them as environment variables" 1140 | category = "main" 1141 | optional = false 1142 | python-versions = ">=3.8" 1143 | 1144 | [package.extras] 1145 | cli = ["click (>=5.0)"] 1146 | 1147 | [[package]] 1148 | name = "python-slugify" 1149 | version = "8.0.1" 1150 | description = "A Python slugify application that also handles Unicode" 1151 | category = "main" 1152 | optional = false 1153 | python-versions = ">=3.7" 1154 | 1155 | [package.dependencies] 1156 | text-unidecode = ">=1.3" 1157 | 1158 | [package.extras] 1159 | unidecode = ["Unidecode (>=1.1.1)"] 1160 | 1161 | [[package]] 1162 | name = "pytimeparse" 1163 | version = "1.1.8" 1164 | description = "Time expression parser" 1165 | category = "main" 1166 | optional = false 1167 | python-versions = "*" 1168 | 1169 | [[package]] 1170 | name = "pytz" 1171 | version = "2022.7.1" 1172 | description = "World timezone definitions, modern and historical" 1173 | category = "main" 1174 | optional = false 1175 | python-versions = "*" 1176 | 1177 | [[package]] 1178 | name = "pytz-deprecation-shim" 1179 | version = "0.1.0.post0" 1180 | description = "Shims to make deprecation of pytz easier" 1181 | category = "main" 1182 | optional = false 1183 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" 1184 | 1185 | [package.dependencies] 1186 | tzdata = {version = "*", markers = "python_version >= \"3.6\""} 1187 | 1188 | [[package]] 1189 | name = "pytzdata" 1190 | version = "2020.1" 1191 | description = "The Olson timezone database for Python." 1192 | category = "main" 1193 | optional = false 1194 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 1195 | 1196 | [[package]] 1197 | name = "pywin32" 1198 | version = "305" 1199 | description = "Python for Window Extensions" 1200 | category = "main" 1201 | optional = false 1202 | python-versions = "*" 1203 | 1204 | [[package]] 1205 | name = "pyyaml" 1206 | version = "6.0" 1207 | description = "YAML parser and emitter for Python" 1208 | category = "main" 1209 | optional = false 1210 | python-versions = ">=3.6" 1211 | 1212 | [[package]] 1213 | name = "requests" 1214 | version = "2.28.2" 1215 | description = "Python HTTP for Humans." 1216 | category = "main" 1217 | optional = false 1218 | python-versions = ">=3.7, <4" 1219 | 1220 | [package.dependencies] 1221 | certifi = ">=2017.4.17" 1222 | charset-normalizer = ">=2,<4" 1223 | idna = ">=2.5,<4" 1224 | urllib3 = ">=1.21.1,<1.27" 1225 | 1226 | [package.extras] 1227 | socks = ["PySocks (>=1.5.6,!=1.5.7)"] 1228 | use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] 1229 | 1230 | [[package]] 1231 | name = "requests-oauthlib" 1232 | version = "1.3.1" 1233 | description = "OAuthlib authentication support for Requests." 1234 | category = "main" 1235 | optional = false 1236 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 1237 | 1238 | [package.dependencies] 1239 | oauthlib = ">=3.0.0" 1240 | requests = ">=2.0.0" 1241 | 1242 | [package.extras] 1243 | rsa = ["oauthlib[signedtoken] (>=3.0.0)"] 1244 | 1245 | [[package]] 1246 | name = "requests-toolbelt" 1247 | version = "0.10.1" 1248 | description = "A utility belt for advanced users of python-requests" 1249 | category = "main" 1250 | optional = false 1251 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 1252 | 1253 | [package.dependencies] 1254 | requests = ">=2.0.1,<3.0.0" 1255 | 1256 | [[package]] 1257 | name = "rich" 1258 | version = "12.6.0" 1259 | description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" 1260 | category = "main" 1261 | optional = false 1262 | python-versions = ">=3.6.3,<4.0.0" 1263 | 1264 | [package.dependencies] 1265 | commonmark = ">=0.9.0,<0.10.0" 1266 | pygments = ">=2.6.0,<3.0.0" 1267 | 1268 | [package.extras] 1269 | jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] 1270 | 1271 | [[package]] 1272 | name = "rsa" 1273 | version = "4.9" 1274 | description = "Pure-Python RSA implementation" 1275 | category = "main" 1276 | optional = false 1277 | python-versions = ">=3.6,<4" 1278 | 1279 | [package.dependencies] 1280 | pyasn1 = ">=0.1.3" 1281 | 1282 | [[package]] 1283 | name = "s3transfer" 1284 | version = "0.6.0" 1285 | description = "An Amazon S3 Transfer Manager" 1286 | category = "main" 1287 | optional = false 1288 | python-versions = ">= 3.7" 1289 | 1290 | [package.dependencies] 1291 | botocore = ">=1.12.36,<2.0a.0" 1292 | 1293 | [package.extras] 1294 | crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] 1295 | 1296 | [[package]] 1297 | name = "shellingham" 1298 | version = "1.5.1" 1299 | description = "Tool to Detect Surrounding Shell" 1300 | category = "main" 1301 | optional = false 1302 | python-versions = ">=3.7" 1303 | 1304 | [[package]] 1305 | name = "six" 1306 | version = "1.16.0" 1307 | description = "Python 2 and 3 compatibility utilities" 1308 | category = "main" 1309 | optional = false 1310 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 1311 | 1312 | [[package]] 1313 | name = "sniffio" 1314 | version = "1.3.0" 1315 | description = "Sniff out which async library your code is running under" 1316 | category = "main" 1317 | optional = false 1318 | python-versions = ">=3.7" 1319 | 1320 | [[package]] 1321 | name = "sqlalchemy" 1322 | version = "1.4.46" 1323 | description = "Database Abstraction Library" 1324 | category = "main" 1325 | optional = false 1326 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" 1327 | 1328 | [package.dependencies] 1329 | greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} 1330 | 1331 | [package.extras] 1332 | aiomysql = ["greenlet (!=0.4.17)", "aiomysql"] 1333 | aiosqlite = ["typing_extensions (!=3.10.0.1)", "greenlet (!=0.4.17)", "aiosqlite"] 1334 | asyncio = ["greenlet (!=0.4.17)"] 1335 | asyncmy = ["greenlet (!=0.4.17)", "asyncmy (>=0.2.3,!=0.2.4)"] 1336 | mariadb_connector = ["mariadb (>=1.0.1,!=1.1.2)"] 1337 | mssql = ["pyodbc"] 1338 | mssql_pymssql = ["pymssql"] 1339 | mssql_pyodbc = ["pyodbc"] 1340 | mypy = ["sqlalchemy2-stubs", "mypy (>=0.910)"] 1341 | mysql = ["mysqlclient (>=1.4.0,<2)", "mysqlclient (>=1.4.0)"] 1342 | mysql_connector = ["mysql-connector-python"] 1343 | oracle = ["cx_oracle (>=7,<8)", "cx_oracle (>=7)"] 1344 | postgresql = ["psycopg2 (>=2.7)"] 1345 | postgresql_asyncpg = ["greenlet (!=0.4.17)", "asyncpg"] 1346 | postgresql_pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] 1347 | postgresql_psycopg2binary = ["psycopg2-binary"] 1348 | postgresql_psycopg2cffi = ["psycopg2cffi"] 1349 | pymysql = ["pymysql (<1)", "pymysql"] 1350 | sqlcipher = ["sqlcipher3-binary"] 1351 | 1352 | [[package]] 1353 | name = "sqlparse" 1354 | version = "0.4.3" 1355 | description = "A non-validating SQL parser." 1356 | category = "main" 1357 | optional = false 1358 | python-versions = ">=3.5" 1359 | 1360 | [[package]] 1361 | name = "starlette" 1362 | version = "0.25.0" 1363 | description = "The little ASGI library that shines." 1364 | category = "main" 1365 | optional = false 1366 | python-versions = ">=3.7" 1367 | 1368 | [package.dependencies] 1369 | anyio = ">=3.4.0,<5" 1370 | typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} 1371 | 1372 | [package.extras] 1373 | full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] 1374 | 1375 | [[package]] 1376 | name = "stringcase" 1377 | version = "1.2.0" 1378 | description = "String case converter." 1379 | category = "main" 1380 | optional = false 1381 | python-versions = "*" 1382 | 1383 | [[package]] 1384 | name = "tabulate" 1385 | version = "0.9.0" 1386 | description = "Pretty-print tabular data" 1387 | category = "main" 1388 | optional = false 1389 | python-versions = ">=3.7" 1390 | 1391 | [package.extras] 1392 | widechars = ["wcwidth"] 1393 | 1394 | [[package]] 1395 | name = "text-unidecode" 1396 | version = "1.3" 1397 | description = "The most basic Text::Unidecode port" 1398 | category = "main" 1399 | optional = false 1400 | python-versions = "*" 1401 | 1402 | [[package]] 1403 | name = "tomli" 1404 | version = "2.0.1" 1405 | description = "A lil' TOML parser" 1406 | category = "main" 1407 | optional = false 1408 | python-versions = ">=3.7" 1409 | 1410 | [[package]] 1411 | name = "toposort" 1412 | version = "1.9" 1413 | description = "Implements a topological sort algorithm." 1414 | category = "main" 1415 | optional = false 1416 | python-versions = "*" 1417 | 1418 | [[package]] 1419 | name = "tqdm" 1420 | version = "4.64.1" 1421 | description = "Fast, Extensible Progress Meter" 1422 | category = "main" 1423 | optional = false 1424 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" 1425 | 1426 | [package.dependencies] 1427 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 1428 | 1429 | [package.extras] 1430 | dev = ["py-make (>=0.1.0)", "twine", "wheel"] 1431 | notebook = ["ipywidgets (>=6)"] 1432 | slack = ["slack-sdk"] 1433 | telegram = ["requests"] 1434 | 1435 | [[package]] 1436 | name = "trino" 1437 | version = "0.321.0" 1438 | description = "Client for the Trino distributed SQL Engine" 1439 | category = "main" 1440 | optional = false 1441 | python-versions = ">=3.7" 1442 | 1443 | [package.dependencies] 1444 | pytz = "*" 1445 | requests = "*" 1446 | tzlocal = "*" 1447 | 1448 | [package.extras] 1449 | all = ["requests-kerberos", "sqlalchemy (>=1.3)"] 1450 | external-authentication-token-cache = ["keyring"] 1451 | kerberos = ["requests-kerberos"] 1452 | sqlalchemy = ["sqlalchemy (>=1.3)"] 1453 | tests = ["requests-kerberos", "sqlalchemy (>=1.3)", "httpretty (<1.1)", "pytest", "pytest-runner", "click", "pre-commit", "black", "isort"] 1454 | 1455 | [[package]] 1456 | name = "typer" 1457 | version = "0.7.0" 1458 | description = "Typer, build great CLIs. Easy to code. Based on Python type hints." 1459 | category = "main" 1460 | optional = false 1461 | python-versions = ">=3.6" 1462 | 1463 | [package.dependencies] 1464 | click = ">=7.1.1,<9.0.0" 1465 | colorama = {version = ">=0.4.3,<0.5.0", optional = true, markers = "extra == \"all\""} 1466 | rich = {version = ">=10.11.0,<13.0.0", optional = true, markers = "extra == \"all\""} 1467 | shellingham = {version = ">=1.3.0,<2.0.0", optional = true, markers = "extra == \"all\""} 1468 | 1469 | [package.extras] 1470 | all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)", "rich (>=10.11.0,<13.0.0)"] 1471 | dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] 1472 | doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "pillow (>=9.3.0,<10.0.0)", "cairosvg (>=2.5.2,<3.0.0)"] 1473 | test = ["shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "coverage (>=6.2,<7.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.910)", "black (>=22.3.0,<23.0.0)", "isort (>=5.0.6,<6.0.0)", "rich (>=10.11.0,<13.0.0)"] 1474 | 1475 | [[package]] 1476 | name = "typing-extensions" 1477 | version = "4.5.0" 1478 | description = "Backported and Experimental Type Hints for Python 3.7+" 1479 | category = "main" 1480 | optional = false 1481 | python-versions = ">=3.7" 1482 | 1483 | [[package]] 1484 | name = "tzdata" 1485 | version = "2022.7" 1486 | description = "Provider of IANA time zone data" 1487 | category = "main" 1488 | optional = false 1489 | python-versions = ">=2" 1490 | 1491 | [[package]] 1492 | name = "tzlocal" 1493 | version = "4.2" 1494 | description = "tzinfo object for the local timezone" 1495 | category = "main" 1496 | optional = false 1497 | python-versions = ">=3.6" 1498 | 1499 | [package.dependencies] 1500 | pytz-deprecation-shim = "*" 1501 | tzdata = {version = "*", markers = "platform_system == \"Windows\""} 1502 | 1503 | [package.extras] 1504 | devenv = ["black", "pyroma", "pytest-cov", "zest.releaser"] 1505 | test = ["pytest-mock (>=3.3)", "pytest (>=4.3)"] 1506 | 1507 | [[package]] 1508 | name = "universal-pathlib" 1509 | version = "0.0.21" 1510 | description = "Pathlib API extended to use fsspec backends" 1511 | category = "main" 1512 | optional = false 1513 | python-versions = ">=3.7" 1514 | 1515 | [package.dependencies] 1516 | fsspec = "*" 1517 | 1518 | [package.extras] 1519 | test = ["aiohttp", "adlfs", "flake8", "gcsfs", "hadoop-test-cluster", "ipython", "jupyter", "moto", "pyarrow", "pylint", "pytest", "requests", "s3fs", "webdav4"] 1520 | 1521 | [[package]] 1522 | name = "urllib3" 1523 | version = "1.26.14" 1524 | description = "HTTP library with thread-safe connection pooling, file post, and more." 1525 | category = "main" 1526 | optional = false 1527 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 1528 | 1529 | [package.extras] 1530 | brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] 1531 | secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] 1532 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 1533 | 1534 | [[package]] 1535 | name = "uvicorn" 1536 | version = "0.20.0" 1537 | description = "The lightning-fast ASGI server." 1538 | category = "main" 1539 | optional = false 1540 | python-versions = ">=3.7" 1541 | 1542 | [package.dependencies] 1543 | click = ">=7.0" 1544 | colorama = {version = ">=0.4", optional = true, markers = "sys_platform == \"win32\" and extra == \"standard\""} 1545 | h11 = ">=0.8" 1546 | httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} 1547 | python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} 1548 | pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} 1549 | uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} 1550 | watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} 1551 | websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} 1552 | 1553 | [package.extras] 1554 | standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] 1555 | 1556 | [[package]] 1557 | name = "uvloop" 1558 | version = "0.17.0" 1559 | description = "Fast implementation of asyncio event loop on top of libuv" 1560 | category = "main" 1561 | optional = false 1562 | python-versions = ">=3.7" 1563 | 1564 | [package.extras] 1565 | dev = ["Cython (>=0.29.32,<0.30.0)", "pytest (>=3.6.0)", "Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "flake8 (>=3.9.2,<3.10.0)", "psutil", "pycodestyle (>=2.7.0,<2.8.0)", "pyOpenSSL (>=22.0.0,<22.1.0)", "mypy (>=0.800)", "aiohttp"] 1566 | docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)"] 1567 | test = ["flake8 (>=3.9.2,<3.10.0)", "psutil", "pycodestyle (>=2.7.0,<2.8.0)", "pyOpenSSL (>=22.0.0,<22.1.0)", "mypy (>=0.800)", "Cython (>=0.29.32,<0.30.0)", "aiohttp"] 1568 | 1569 | [[package]] 1570 | name = "watchdog" 1571 | version = "2.3.0" 1572 | description = "Filesystem events monitoring" 1573 | category = "main" 1574 | optional = false 1575 | python-versions = ">=3.6" 1576 | 1577 | [package.extras] 1578 | watchmedo = ["PyYAML (>=3.10)"] 1579 | 1580 | [[package]] 1581 | name = "watchfiles" 1582 | version = "0.18.1" 1583 | description = "Simple, modern and high performance file watching and code reload in python." 1584 | category = "main" 1585 | optional = false 1586 | python-versions = ">=3.7" 1587 | 1588 | [package.dependencies] 1589 | anyio = ">=3.0.0" 1590 | 1591 | [[package]] 1592 | name = "websocket-client" 1593 | version = "1.5.1" 1594 | description = "WebSocket client for Python with low level API options" 1595 | category = "main" 1596 | optional = false 1597 | python-versions = ">=3.7" 1598 | 1599 | [package.extras] 1600 | docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"] 1601 | optional = ["python-socks", "wsaccel"] 1602 | test = ["websockets"] 1603 | 1604 | [[package]] 1605 | name = "websockets" 1606 | version = "10.4" 1607 | description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" 1608 | category = "main" 1609 | optional = false 1610 | python-versions = ">=3.7" 1611 | 1612 | [[package]] 1613 | name = "werkzeug" 1614 | version = "2.2.3" 1615 | description = "The comprehensive WSGI web application library." 1616 | category = "main" 1617 | optional = false 1618 | python-versions = ">=3.7" 1619 | 1620 | [package.dependencies] 1621 | MarkupSafe = ">=2.1.1" 1622 | 1623 | [package.extras] 1624 | watchdog = ["watchdog"] 1625 | 1626 | [[package]] 1627 | name = "yarl" 1628 | version = "1.8.2" 1629 | description = "Yet another URL library" 1630 | category = "main" 1631 | optional = false 1632 | python-versions = ">=3.7" 1633 | 1634 | [package.dependencies] 1635 | idna = ">=2.0" 1636 | multidict = ">=4.0" 1637 | 1638 | [metadata] 1639 | lock-version = "1.1" 1640 | python-versions = "^3.9" 1641 | content-hash = "5d9589b62bc75424c2f5006b30a18d52397def60868f07fa4d4546d79a9fce97" 1642 | 1643 | [metadata.files] 1644 | agate = [] 1645 | alembic = [] 1646 | aniso8601 = [] 1647 | anyio = [] 1648 | attrs = [] 1649 | babel = [] 1650 | backoff = [] 1651 | betterproto = [] 1652 | boto3 = [] 1653 | botocore = [] 1654 | cachetools = [] 1655 | certifi = [] 1656 | cffi = [] 1657 | charset-normalizer = [] 1658 | click = [] 1659 | colorama = [] 1660 | coloredlogs = [] 1661 | commonmark = [] 1662 | croniter = [] 1663 | dagit = [] 1664 | dagster = [] 1665 | dagster-aws = [] 1666 | dagster-dbt = [] 1667 | dagster-graphql = [] 1668 | dagster-k8s = [] 1669 | dagster-postgres = [] 1670 | dagster-pyspark = [] 1671 | dagster-spark = [] 1672 | dbt-core = [] 1673 | dbt-extractor = [] 1674 | dbt-trino = [] 1675 | deltalake = [] 1676 | docstring-parser = [] 1677 | fsspec = [] 1678 | future = [] 1679 | google-auth = [] 1680 | gql = [] 1681 | graphene = [] 1682 | graphql-core = [] 1683 | graphql-relay = [] 1684 | greenlet = [] 1685 | grpcio = [] 1686 | grpcio-health-checking = [] 1687 | grpclib = [] 1688 | h11 = [] 1689 | h2 = [] 1690 | hologram = [] 1691 | hpack = [] 1692 | httptools = [] 1693 | humanfriendly = [] 1694 | hyperframe = [] 1695 | idna = [] 1696 | isodate = [] 1697 | jinja2 = [] 1698 | jmespath = [] 1699 | jsonschema = [] 1700 | kubernetes = [] 1701 | leather = [] 1702 | logbook = [] 1703 | mako = [] 1704 | markupsafe = [] 1705 | mashumaro = [] 1706 | minimal-snowplow-tracker = [] 1707 | msgpack = [] 1708 | multidict = [] 1709 | networkx = [] 1710 | numpy = [] 1711 | oauthlib = [] 1712 | packaging = [] 1713 | pandas = [] 1714 | parsedatetime = [] 1715 | pathspec = [] 1716 | pendulum = [] 1717 | protobuf = [] 1718 | psutil = [] 1719 | psycopg2-binary = [] 1720 | py4j = [] 1721 | pyarrow = [] 1722 | pyasn1 = [] 1723 | pyasn1-modules = [] 1724 | pycparser = [] 1725 | pydantic = [] 1726 | pygments = [] 1727 | pyreadline3 = [] 1728 | pyrsistent = [] 1729 | pyspark = [] 1730 | python-dateutil = [] 1731 | python-dotenv = [] 1732 | python-slugify = [] 1733 | pytimeparse = [] 1734 | pytz = [] 1735 | pytz-deprecation-shim = [] 1736 | pytzdata = [] 1737 | pywin32 = [] 1738 | pyyaml = [] 1739 | requests = [] 1740 | requests-oauthlib = [] 1741 | requests-toolbelt = [] 1742 | rich = [] 1743 | rsa = [] 1744 | s3transfer = [] 1745 | shellingham = [] 1746 | six = [] 1747 | sniffio = [] 1748 | sqlalchemy = [] 1749 | sqlparse = [] 1750 | starlette = [] 1751 | stringcase = [] 1752 | tabulate = [] 1753 | text-unidecode = [] 1754 | tomli = [] 1755 | toposort = [] 1756 | tqdm = [] 1757 | trino = [] 1758 | typer = [] 1759 | typing-extensions = [] 1760 | tzdata = [] 1761 | tzlocal = [] 1762 | universal-pathlib = [] 1763 | urllib3 = [] 1764 | uvicorn = [] 1765 | uvloop = [] 1766 | watchdog = [] 1767 | watchfiles = [] 1768 | websocket-client = [] 1769 | websockets = [] 1770 | werkzeug = [] 1771 | yarl = [] 1772 | -------------------------------------------------------------------------------- /resources/practices/tweetschampions/tests/example.json: -------------------------------------------------------------------------------- 1 | {"created_at":"Sat May 26 13:18:30 +0000 2018","id":1000365563376488448,"id_str":"1000365563376488448","text":"MATCH-DAY\n\nReal Madrid vs Liverpool\n\n#UCLFinal #LFC Free Live Stream HD Here: https:\/\/t.co\/PHAepWsA6o https:\/\/t.co\/T6mWNz14lb","display_text_range":[0,101],"source":"\u003ca href=\"http:\/\/www.hootsuite.com\" rel=\"nofollow\"\u003eHootsuite\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":2846595478,"id_str":"2846595478","name":"Real Madrid vs Liverpool Live stream","screen_name":"UCL_TV","location":"USA","url":"http:\/\/www.astream.me","description":"Champions League Live Stream: Find All news And Watch Football Live Stream: #UCL , #EPL , #Bundesliga, #Ligue1 #Liga Free Live Stream Here: astream.eu","translator_type":"none","protected":false,"verified":false,"followers_count":15294,"friends_count":14768,"listed_count":60,"favourites_count":3281,"statuses_count":24148,"created_at":"Mon Oct 27 11:38:12 +0000 2014","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"fr","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/898570202887184385\/OJkJCbKW_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/898570202887184385\/OJkJCbKW_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/2846595478\/1503071541","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLFinal","indices":[37,46]},{"text":"LFC","indices":[47,51]}],"urls":[{"url":"https:\/\/t.co\/PHAepWsA6o","expanded_url":"http:\/\/bit.ly\/2s5W3Pc","display_url":"bit.ly\/2s5W3Pc","indices":[78,101]}],"user_mentions":[],"symbols":[],"media":[{"id":1000365561514258432,"id_str":"1000365561514258432","indices":[102,125],"media_url":"http:\/\/pbs.twimg.com\/media\/DeIDLZOXcAAhern.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeIDLZOXcAAhern.jpg","url":"https:\/\/t.co\/T6mWNz14lb","display_url":"pic.twitter.com\/T6mWNz14lb","expanded_url":"https:\/\/twitter.com\/UCL_TV\/status\/1000365563376488448\/photo\/1","type":"photo","sizes":{"small":{"w":680,"h":453,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":960,"h":640,"resize":"fit"},"medium":{"w":960,"h":640,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1000365561514258432,"id_str":"1000365561514258432","indices":[102,125],"media_url":"http:\/\/pbs.twimg.com\/media\/DeIDLZOXcAAhern.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeIDLZOXcAAhern.jpg","url":"https:\/\/t.co\/T6mWNz14lb","display_url":"pic.twitter.com\/T6mWNz14lb","expanded_url":"https:\/\/twitter.com\/UCL_TV\/status\/1000365563376488448\/photo\/1","type":"photo","sizes":{"small":{"w":680,"h":453,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":960,"h":640,"resize":"fit"},"medium":{"w":960,"h":640,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1527340710859"} 2 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365564190048257,"id_str":"1000365564190048257","text":"RT @panditfootball: [VIDEO] Siapa yang punya peluang paling besar jadi juara Liga Champions 2018? #UCLfinal #SportOne \n\nhttps:\/\/t.co\/rVhdDC\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":2917613580,"id_str":"2917613580","name":"Individu Merdeka \u262e","screen_name":"Raditakia","location":"Brenksek City","url":null,"description":"jaga kecepatan,jangan ngebutt!!!","translator_type":"none","protected":false,"verified":false,"followers_count":530,"friends_count":753,"listed_count":5,"favourites_count":5608,"statuses_count":41354,"created_at":"Wed Dec 03 13:58:11 +0000 2014","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"id","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/995105861709086721\/8dzhwQn__normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/995105861709086721\/8dzhwQn__normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/2917613580\/1523299804","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 26 13:16:44 +0000 2018","id":1000365116427153410,"id_str":"1000365116427153410","text":"[VIDEO] Siapa yang punya peluang paling besar jadi juara Liga Champions 2018? #UCLfinal #SportOne \n\nhttps:\/\/t.co\/rVhdDC7pVg","source":"\u003ca href=\"https:\/\/about.twitter.com\/products\/tweetdeck\" rel=\"nofollow\"\u003eTweetDeck\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":947281567,"id_str":"947281567","name":"PanditFootball.com","screen_name":"panditfootball","location":"Indonesia","url":"http:\/\/panditfootball.com","description":"Berita, cerita, sejarah, taktik, dan analisis sepakbola | redaksi@panditfootball.com | Kirim tulisan http:\/\/bit.ly\/sharingpandit | http:\/\/fb.com\/panditfootball","translator_type":"regular","protected":false,"verified":true,"followers_count":413628,"friends_count":216,"listed_count":535,"favourites_count":1130,"statuses_count":108045,"created_at":"Wed Nov 14 07:45:11 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_tile":false,"profile_link_color":"E81C4F","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/945669335057448960\/C2v3sMAW_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/945669335057448960\/C2v3sMAW_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/947281567\/1504079338","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":1,"favorite_count":1,"entities":{"hashtags":[{"text":"UCLfinal","indices":[78,87]},{"text":"SportOne","indices":[88,97]}],"urls":[{"url":"https:\/\/t.co\/rVhdDC7pVg","expanded_url":"https:\/\/www.youtube.com\/watch?v=_qTJEKOV02g","display_url":"youtube.com\/watch?v=_qTJEK\u2026","indices":[100,123]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"in"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLfinal","indices":[98,107]},{"text":"SportOne","indices":[108,117]}],"urls":[],"user_mentions":[{"screen_name":"panditfootball","name":"PanditFootball.com","id":947281567,"id_str":"947281567","indices":[3,18]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"in","timestamp_ms":"1527340711053"} 3 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365564320247808,"id_str":"1000365564320247808","text":"RT @madridismoreaI_: HA LLEGADO EL D\u00cdA \u26bd\ufe0f\n\nHACERLO REAL @realmadrid \ud83d\ude4f\ud83c\udffb\n\n@Nissan_ESP #InnovateYourGame #UCLFinal https:\/\/t.co\/CbOafTlbac","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":946054253461852160,"id_str":"946054253461852160","name":"Roberto Esper\u00f3n alonso","screen_name":"esperon_alonso","location":"Galicia, Espa\u00f1a","url":null,"description":"Madridista celeste y granate de coraz\u00f3n objetivo cr\u00edtico y amante al deporte en general","translator_type":"none","protected":false,"verified":false,"followers_count":234,"friends_count":996,"listed_count":0,"favourites_count":26179,"statuses_count":18715,"created_at":"Wed Dec 27 16:24:45 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/984431161345703937\/r3YM30Mz_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/984431161345703937\/r3YM30Mz_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/946054253461852160\/1518612861","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 26 11:28:58 +0000 2018","id":1000337998561636352,"id_str":"1000337998561636352","text":"HA LLEGADO EL D\u00cdA \u26bd\ufe0f\n\nHACERLO REAL @realmadrid \ud83d\ude4f\ud83c\udffb\n\n@Nissan_ESP #InnovateYourGame #UCLFinal https:\/\/t.co\/CbOafTlbac","display_text_range":[0,90],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":188481426,"id_str":"188481426","name":"Madridismo Real\u2122","screen_name":"madridismoreaI_","location":null,"url":"http:\/\/www.instagram.com\/madridismoreai","description":"Si eres Madridista, \u00a1\u00a1S\u00edgueme!! Toda la informaci\u00f3n del club, fotos, videos, noticias, comentarios.. \u00a1HALA MADRID! Contacto: madridismoreai@hotmail.com","translator_type":"none","protected":false,"verified":false,"followers_count":222874,"friends_count":165,"listed_count":916,"favourites_count":59,"statuses_count":19620,"created_at":"Wed Sep 08 20:56:05 +0000 2010","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"9AE4E8","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":true,"profile_link_color":"DF0101","profile_sidebar_border_color":"BDDCAD","profile_sidebar_fill_color":"DDFFCC","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/762274141991043072\/3rX0INSP_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/762274141991043072\/3rX0INSP_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/188481426\/1470575295","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":2,"retweet_count":33,"favorite_count":24,"entities":{"hashtags":[{"text":"InnovateYourGame","indices":[63,80]},{"text":"UCLFinal","indices":[81,90]}],"urls":[],"user_mentions":[{"screen_name":"realmadrid","name":"Real Madrid C.F. \u26bd\ufe0f","id":14872237,"id_str":"14872237","indices":[35,46]},{"screen_name":"Nissan_ESP","name":"Nissan Espa\u00f1a","id":291745825,"id_str":"291745825","indices":[51,62]}],"symbols":[],"media":[{"id":1000337991989170176,"id_str":"1000337991989170176","indices":[91,114],"media_url":"http:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","url":"https:\/\/t.co\/CbOafTlbac","display_url":"pic.twitter.com\/CbOafTlbac","expanded_url":"https:\/\/twitter.com\/madridismoreaI_\/status\/1000337998561636352\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":683,"resize":"fit"},"medium":{"w":1024,"h":683,"resize":"fit"},"small":{"w":680,"h":454,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1000337991989170176,"id_str":"1000337991989170176","indices":[91,114],"media_url":"http:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","url":"https:\/\/t.co\/CbOafTlbac","display_url":"pic.twitter.com\/CbOafTlbac","expanded_url":"https:\/\/twitter.com\/madridismoreaI_\/status\/1000337998561636352\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":683,"resize":"fit"},"medium":{"w":1024,"h":683,"resize":"fit"},"small":{"w":680,"h":454,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"es"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"InnovateYourGame","indices":[84,101]},{"text":"UCLFinal","indices":[102,111]}],"urls":[],"user_mentions":[{"screen_name":"madridismoreaI_","name":"Madridismo Real\u2122","id":188481426,"id_str":"188481426","indices":[3,19]},{"screen_name":"realmadrid","name":"Real Madrid C.F. \u26bd\ufe0f","id":14872237,"id_str":"14872237","indices":[56,67]},{"screen_name":"Nissan_ESP","name":"Nissan Espa\u00f1a","id":291745825,"id_str":"291745825","indices":[72,83]}],"symbols":[],"media":[{"id":1000337991989170176,"id_str":"1000337991989170176","indices":[112,135],"media_url":"http:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","url":"https:\/\/t.co\/CbOafTlbac","display_url":"pic.twitter.com\/CbOafTlbac","expanded_url":"https:\/\/twitter.com\/madridismoreaI_\/status\/1000337998561636352\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":683,"resize":"fit"},"medium":{"w":1024,"h":683,"resize":"fit"},"small":{"w":680,"h":454,"resize":"fit"}},"source_status_id":1000337998561636352,"source_status_id_str":"1000337998561636352","source_user_id":188481426,"source_user_id_str":"188481426"}]},"extended_entities":{"media":[{"id":1000337991989170176,"id_str":"1000337991989170176","indices":[112,135],"media_url":"http:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeHqGovXkAAgCx0.jpg","url":"https:\/\/t.co\/CbOafTlbac","display_url":"pic.twitter.com\/CbOafTlbac","expanded_url":"https:\/\/twitter.com\/madridismoreaI_\/status\/1000337998561636352\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":683,"resize":"fit"},"medium":{"w":1024,"h":683,"resize":"fit"},"small":{"w":680,"h":454,"resize":"fit"}},"source_status_id":1000337998561636352,"source_status_id_str":"1000337998561636352","source_user_id":188481426,"source_user_id_str":"188481426"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"es","timestamp_ms":"1527340711084"} 4 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365564357832705,"id_str":"1000365564357832705","text":"RT @madridismoreaI_: Mi XI para ma\u00f1ana: Navas, Carvajal, Nacho, Ramos, Marcelo, Modric, Kroos, Casemiro, Isco, Bale y Cristiano.\n\n\u00bfCu\u00e1l po\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/\" rel=\"nofollow\"\u003eMadridista2\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":962840767,"id_str":"962840767","name":"R.Madrid\u2122","screen_name":"RMadridIsco","location":"Madrid","url":null,"description":"''No s\u00e9 si lo entend\u00e9is, llev\u00e1is en vuestras camisetas el escudo del Real Madrid'' | @isco_alarcon | Contacto \u2709 MD","translator_type":"none","protected":false,"verified":false,"followers_count":90905,"friends_count":167,"listed_count":199,"favourites_count":1192,"statuses_count":2619,"created_at":"Wed Nov 21 18:09:54 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"3B94D9","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":true,"profile_link_color":"1B95E0","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/999762826032922625\/bZc1Aq6p_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/999762826032922625\/bZc1Aq6p_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/962840767\/1524417608","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri May 25 15:04:21 +0000 2018","id":1000029812684148737,"id_str":"1000029812684148737","text":"Mi XI para ma\u00f1ana: Navas, Carvajal, Nacho, Ramos, Marcelo, Modric, Kroos, Casemiro, Isco, Bale y Cristiano.\n\n\u00bfCu\u00e1l\u2026 https:\/\/t.co\/IsXK9nmVsx","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":188481426,"id_str":"188481426","name":"Madridismo Real\u2122","screen_name":"madridismoreaI_","location":null,"url":"http:\/\/www.instagram.com\/madridismoreai","description":"Si eres Madridista, \u00a1\u00a1S\u00edgueme!! Toda la informaci\u00f3n del club, fotos, videos, noticias, comentarios.. \u00a1HALA MADRID! Contacto: madridismoreai@hotmail.com","translator_type":"none","protected":false,"verified":false,"followers_count":222873,"friends_count":165,"listed_count":917,"favourites_count":59,"statuses_count":19621,"created_at":"Wed Sep 08 20:56:05 +0000 2010","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"9AE4E8","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":true,"profile_link_color":"DF0101","profile_sidebar_border_color":"BDDCAD","profile_sidebar_fill_color":"DDFFCC","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/762274141991043072\/3rX0INSP_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/762274141991043072\/3rX0INSP_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/188481426\/1470575295","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"Mi XI para ma\u00f1ana: Navas, Carvajal, Nacho, Ramos, Marcelo, Modric, Kroos, Casemiro, Isco, Bale y Cristiano.\n\n\u00bfCu\u00e1l podr\u00edais vosotros?\n\n@Nissan_ESP #InnovateYourGame #UCLFinal","display_text_range":[0,175],"entities":{"hashtags":[{"text":"InnovateYourGame","indices":[148,165]},{"text":"UCLFinal","indices":[166,175]}],"urls":[],"user_mentions":[{"screen_name":"Nissan_ESP","name":"Nissan Espa\u00f1a","id":291745825,"id_str":"291745825","indices":[136,147]}],"symbols":[]}},"quote_count":0,"reply_count":6,"retweet_count":17,"favorite_count":20,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/IsXK9nmVsx","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1000029812684148737","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"es"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"madridismoreaI_","name":"Madridismo Real\u2122","id":188481426,"id_str":"188481426","indices":[3,19]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"es","timestamp_ms":"1527340711093"} 5 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365564517388288,"id_str":"1000365564517388288","text":"RT @ECG_Unofficial: We will like to categorically state our sincere commitment to ensure you all watch today's #UCLfinal\n\nSo, if you live a\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":902735000445095938,"id_str":"902735000445095938","name":"Kwame-Nat","screen_name":"Kwame_Nat1","location":null,"url":null,"description":"**Entrepreneur**Manager**Believer of the Gospel**Liverpool fc crazy**Fashionista**","translator_type":"none","protected":false,"verified":false,"followers_count":364,"friends_count":657,"listed_count":0,"favourites_count":5274,"statuses_count":2817,"created_at":"Wed Aug 30 03:29:30 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/986756749918195712\/d05oMAYY_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/986756749918195712\/d05oMAYY_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/902735000445095938\/1515445616","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 26 11:14:44 +0000 2018","id":1000334414465945602,"id_str":"1000334414465945602","text":"We will like to categorically state our sincere commitment to ensure you all watch today's #UCLfinal\n\nSo, if you li\u2026 https:\/\/t.co\/4p8xnk7yO7","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":2411233145,"id_str":"2411233145","name":"ECG","screen_name":"ECG_Unofficial","location":"Ghana","url":null,"description":"Electricity Company of Ghana\n\n #parody","translator_type":"none","protected":false,"verified":false,"followers_count":5187,"friends_count":423,"listed_count":6,"favourites_count":266,"statuses_count":629,"created_at":"Fri Mar 14 22:08:48 +0000 2014","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/445986995089977345\/drrxMHnd_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/445986995089977345\/drrxMHnd_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/2411233145\/1395166558","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"We will like to categorically state our sincere commitment to ensure you all watch today's #UCLfinal\n\nSo, if you live around Madina, kindly visit your friends at East Legon. If you live around Haatso, Ecomog, visit your friends at West Legon.\n\nECG, giving you a leg to stand on.","display_text_range":[0,278],"entities":{"hashtags":[{"text":"UCLfinal","indices":[91,100]}],"urls":[],"user_mentions":[],"symbols":[]}},"quote_count":15,"reply_count":5,"retweet_count":42,"favorite_count":26,"entities":{"hashtags":[{"text":"UCLfinal","indices":[91,100]}],"urls":[{"url":"https:\/\/t.co\/4p8xnk7yO7","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1000334414465945602","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLfinal","indices":[111,120]}],"urls":[],"user_mentions":[{"screen_name":"ECG_Unofficial","name":"ECG","id":2411233145,"id_str":"2411233145","indices":[3,18]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1527340711131"} 6 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365564781580289,"id_str":"1000365564781580289","text":"Real Madrid.... LETS GO!!!!!!!!!!!!!!!!!!!!!!! I am not gonna be too arrogant predicting we gonna win by 5-0, so I\u2026 https:\/\/t.co\/LicM1rwwAH","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":240672622,"id_str":"240672622","name":"NOSA TOPNOTCH","screen_name":"nosatopnotch","location":"Everywhere","url":null,"description":"#Arsenal #RealMadrid Sports, Entertainment And Politics Opiner","translator_type":"none","protected":false,"verified":false,"followers_count":1322,"friends_count":1229,"listed_count":7,"favourites_count":6,"statuses_count":5984,"created_at":"Thu Jan 20 13:32:14 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/485877597\/60512_474757379282_577519282_6700664_4045541_n__1_.jpg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/485877597\/60512_474757379282_577519282_6700664_4045541_n__1_.jpg","profile_background_tile":true,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/892097459794718723\/ghns-S6f_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/892097459794718723\/ghns-S6f_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/240672622\/1495108735","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"Real Madrid.... LETS GO!!!!!!!!!!!!!!!!!!!!!!! I am not gonna be too arrogant predicting we gonna win by 5-0, so I am humbly gonna predict a 4-0 Real Madrid win... #UCLfinal","display_text_range":[0,174],"entities":{"hashtags":[{"text":"UCLfinal","indices":[165,174]}],"urls":[],"user_mentions":[],"symbols":[]}},"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/LicM1rwwAH","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1000365564781580289","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1527340711194"} 7 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365565125513217,"id_str":"1000365565125513217","text":"RT @ChampionsLeague: Klopp \ud83e\udd23\ud83e\udd23\ud83e\udd23\n\n#UCLfinal https:\/\/t.co\/Ve5eyfa1wV","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":311037764,"id_str":"311037764","name":"Sredny","screen_name":"SredXNY","location":"Rubio York-Venezuela","url":null,"description":"A real engineer.\nWinter is coming \u2744\ufe0f\nLove all, trust a few, do wrong to none","translator_type":"none","protected":false,"verified":false,"followers_count":238,"friends_count":230,"listed_count":1,"favourites_count":44,"statuses_count":23289,"created_at":"Sat Jun 04 19:53:17 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_tile":true,"profile_link_color":"E81C4F","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"EFEFEF","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/461713211717189632\/MHz2oajj_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/461713211717189632\/MHz2oajj_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/311037764\/1398792265","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri May 25 16:08:24 +0000 2018","id":1000045931692216320,"id_str":"1000045931692216320","text":"Klopp \ud83e\udd23\ud83e\udd23\ud83e\udd23\n\n#UCLfinal https:\/\/t.co\/Ve5eyfa1wV","display_text_range":[0,20],"source":"\u003ca href=\"https:\/\/studio.twitter.com\" rel=\"nofollow\"\u003eMedia Studio\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":627673190,"id_str":"627673190","name":"#UCLfinal","screen_name":"ChampionsLeague","location":"Nyon, Vaud","url":"http:\/\/uefa.com\/uefachampionsleague","description":"The official home of the #UCL on Twitter. Spanish: @LigadeCampeones U19: @UEFAYouthLeague YouTube: http:\/\/uefa.tv","translator_type":"none","protected":false,"verified":true,"followers_count":21466722,"friends_count":472,"listed_count":18487,"favourites_count":1222,"statuses_count":55552,"created_at":"Thu Jul 05 19:43:40 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"022330","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1B95E0","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/616969183948054528\/mF1Oxcly_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/616969183948054528\/mF1Oxcly_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/627673190\/1525295441","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":589,"reply_count":197,"retweet_count":5024,"favorite_count":17620,"entities":{"hashtags":[{"text":"UCLfinal","indices":[11,20]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1000045584844238848,"id_str":"1000045584844238848","indices":[21,44],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","url":"https:\/\/t.co\/Ve5eyfa1wV","display_url":"pic.twitter.com\/Ve5eyfa1wV","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000045931692216320\/video\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1000045584844238848,"id_str":"1000045584844238848","indices":[21,44],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","url":"https:\/\/t.co\/Ve5eyfa1wV","display_url":"pic.twitter.com\/Ve5eyfa1wV","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000045931692216320\/video\/1","type":"video","video_info":{"aspect_ratio":[1,1],"duration_millis":26960,"variants":[{"bitrate":1280000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/720x720\/MZfL99kQoTwl-QPT.mp4?tag=2"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/pl\/AE7a0nTqUfg5fN-e.m3u8?tag=2"},{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/480x480\/CY1dP8NkFyV4QrQj.mp4?tag=2"},{"bitrate":320000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/240x240\/Rz71TlI7j_jHg-v5.mp4?tag=2"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"sv"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLfinal","indices":[32,41]}],"urls":[],"user_mentions":[{"screen_name":"ChampionsLeague","name":"#UCLfinal","id":627673190,"id_str":"627673190","indices":[3,19]}],"symbols":[],"media":[{"id":1000045584844238848,"id_str":"1000045584844238848","indices":[42,65],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","url":"https:\/\/t.co\/Ve5eyfa1wV","display_url":"pic.twitter.com\/Ve5eyfa1wV","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000045931692216320\/video\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}},"source_status_id":1000045931692216320,"source_status_id_str":"1000045931692216320","source_user_id":627673190,"source_user_id_str":"627673190"}]},"extended_entities":{"media":[{"id":1000045584844238848,"id_str":"1000045584844238848","indices":[42,65],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/1000045584844238848\/img\/cWpvax0XO4W3KcxD.jpg","url":"https:\/\/t.co\/Ve5eyfa1wV","display_url":"pic.twitter.com\/Ve5eyfa1wV","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000045931692216320\/video\/1","type":"video","video_info":{"aspect_ratio":[1,1],"duration_millis":26960,"variants":[{"bitrate":1280000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/720x720\/MZfL99kQoTwl-QPT.mp4?tag=2"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/pl\/AE7a0nTqUfg5fN-e.m3u8?tag=2"},{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/480x480\/CY1dP8NkFyV4QrQj.mp4?tag=2"},{"bitrate":320000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/1000045584844238848\/vid\/240x240\/Rz71TlI7j_jHg-v5.mp4?tag=2"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}},"source_status_id":1000045931692216320,"source_status_id_str":"1000045931692216320","source_user_id":627673190,"source_user_id_str":"627673190"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"sv","timestamp_ms":"1527340711276"} 8 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365565154742272,"id_str":"1000365565154742272","text":"RT @YNWA_Claire: Please? \ud83d\ude4f\ud83c\udffc\n\n#UCLFinal https:\/\/t.co\/nFeiI2C75z","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":277019564,"id_str":"277019564","name":"kenyan kop","screen_name":"Kenyankop","location":"Mombasakenya","url":null,"description":"Join team seeing is believing #LFChackingmanuseless* am hot coz am red, u hate coz u not! #Liverpool is life..my blood is red* LFC family. #anything Liverpool","translator_type":"none","protected":false,"verified":false,"followers_count":1423,"friends_count":1875,"listed_count":5,"favourites_count":21417,"statuses_count":17163,"created_at":"Mon Apr 04 14:55:41 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/991539061922914305\/-cnIIN8f_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/991539061922914305\/-cnIIN8f_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/277019564\/1516067027","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 26 13:13:45 +0000 2018","id":1000364366150144000,"id_str":"1000364366150144000","text":"Please? \ud83d\ude4f\ud83c\udffc\n\n#UCLFinal https:\/\/t.co\/nFeiI2C75z","display_text_range":[0,21],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":932362455149408257,"id_str":"932362455149408257","name":"Claire","screen_name":"YNWA_Claire","location":null,"url":null,"description":null,"translator_type":"none","protected":false,"verified":false,"followers_count":8916,"friends_count":8242,"listed_count":17,"favourites_count":10976,"statuses_count":6260,"created_at":"Sun Nov 19 21:38:26 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"E81C4F","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/987674593430253568\/nrj2nK-r_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/987674593430253568\/nrj2nK-r_normal.jpg","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":1,"favorite_count":18,"entities":{"hashtags":[{"text":"UCLFinal","indices":[12,21]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1000364360131280896,"id_str":"1000364360131280896","indices":[22,45],"media_url":"http:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","url":"https:\/\/t.co\/nFeiI2C75z","display_url":"pic.twitter.com\/nFeiI2C75z","expanded_url":"https:\/\/twitter.com\/YNWA_Claire\/status\/1000364366150144000\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":736,"h":907,"resize":"fit"},"small":{"w":552,"h":680,"resize":"fit"},"medium":{"w":736,"h":907,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1000364360131280896,"id_str":"1000364360131280896","indices":[22,45],"media_url":"http:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","url":"https:\/\/t.co\/nFeiI2C75z","display_url":"pic.twitter.com\/nFeiI2C75z","expanded_url":"https:\/\/twitter.com\/YNWA_Claire\/status\/1000364366150144000\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":736,"h":907,"resize":"fit"},"small":{"w":552,"h":680,"resize":"fit"},"medium":{"w":736,"h":907,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLFinal","indices":[29,38]}],"urls":[],"user_mentions":[{"screen_name":"YNWA_Claire","name":"Claire","id":932362455149408257,"id_str":"932362455149408257","indices":[3,15]}],"symbols":[],"media":[{"id":1000364360131280896,"id_str":"1000364360131280896","indices":[39,62],"media_url":"http:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","url":"https:\/\/t.co\/nFeiI2C75z","display_url":"pic.twitter.com\/nFeiI2C75z","expanded_url":"https:\/\/twitter.com\/YNWA_Claire\/status\/1000364366150144000\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":736,"h":907,"resize":"fit"},"small":{"w":552,"h":680,"resize":"fit"},"medium":{"w":736,"h":907,"resize":"fit"}},"source_status_id":1000364366150144000,"source_status_id_str":"1000364366150144000","source_user_id":932362455149408257,"source_user_id_str":"932362455149408257"}]},"extended_entities":{"media":[{"id":1000364360131280896,"id_str":"1000364360131280896","indices":[39,62],"media_url":"http:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/DeICFduWAAARC6O.jpg","url":"https:\/\/t.co\/nFeiI2C75z","display_url":"pic.twitter.com\/nFeiI2C75z","expanded_url":"https:\/\/twitter.com\/YNWA_Claire\/status\/1000364366150144000\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":736,"h":907,"resize":"fit"},"small":{"w":552,"h":680,"resize":"fit"},"medium":{"w":736,"h":907,"resize":"fit"}},"source_status_id":1000364366150144000,"source_status_id_str":"1000364366150144000","source_user_id":932362455149408257,"source_user_id_str":"932362455149408257"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1527340711283"} 9 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365565314322432,"id_str":"1000365565314322432","text":"RT @ChampionsLeague: Two European giants go head-to-head.\n\nWho will win the #UCLfinal? \n\n#LaysUnited https:\/\/t.co\/uBu5Fe7M6S","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":957644286350315521,"id_str":"957644286350315521","name":"Abdulai Suburu","screen_name":"SuburuAbdulai","location":"Wa, Upper West Region. Ghana","url":null,"description":"A teacher by profession,Player of Biyad 77 stars, sports caster for Pupeli 92.7fm, reporter for @Ghanapremierleaguelive.com, Liverpool Football Club Fan","translator_type":"none","protected":false,"verified":false,"followers_count":427,"friends_count":2999,"listed_count":2,"favourites_count":31400,"statuses_count":33042,"created_at":"Sun Jan 28 15:59:24 +0000 2018","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/985207770751361025\/EGrD2AMN_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/985207770751361025\/EGrD2AMN_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/957644286350315521\/1517162274","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 26 13:00:01 +0000 2018","id":1000360910077083650,"id_str":"1000360910077083650","text":"Two European giants go head-to-head.\n\nWho will win the #UCLfinal? \n\n#LaysUnited https:\/\/t.co\/uBu5Fe7M6S","display_text_range":[0,79],"source":"\u003ca href=\"https:\/\/studio.twitter.com\" rel=\"nofollow\"\u003eMedia Studio\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":627673190,"id_str":"627673190","name":"#UCLfinal","screen_name":"ChampionsLeague","location":"Nyon, Vaud","url":"http:\/\/uefa.com\/uefachampionsleague","description":"The official home of the #UCL on Twitter. Spanish: @LigadeCampeones U19: @UEFAYouthLeague YouTube: http:\/\/uefa.tv","translator_type":"none","protected":false,"verified":true,"followers_count":21466722,"friends_count":472,"listed_count":18487,"favourites_count":1222,"statuses_count":55552,"created_at":"Thu Jul 05 19:43:40 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"022330","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1B95E0","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/616969183948054528\/mF1Oxcly_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/616969183948054528\/mF1Oxcly_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/627673190\/1525295441","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":16,"reply_count":55,"retweet_count":158,"favorite_count":707,"entities":{"hashtags":[{"text":"UCLfinal","indices":[55,64]},{"text":"LaysUnited","indices":[68,79]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":998559751943475201,"id_str":"998559751943475201","indices":[80,103],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","url":"https:\/\/t.co\/uBu5Fe7M6S","display_url":"pic.twitter.com\/uBu5Fe7M6S","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000360910077083650\/video\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":998559751943475201,"id_str":"998559751943475201","indices":[80,103],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","url":"https:\/\/t.co\/uBu5Fe7M6S","display_url":"pic.twitter.com\/uBu5Fe7M6S","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000360910077083650\/video\/1","type":"video","video_info":{"aspect_ratio":[1,1],"duration_millis":17480,"variants":[{"bitrate":1280000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/720x720\/xfZxMeArzDuQKXbx.mp4?tag=2"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/pl\/1PFEtaQXzqk9W9p7.m3u8?tag=2"},{"bitrate":320000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/240x240\/Aj2wyqDf0sXFgEWU.mp4?tag=2"},{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/480x480\/CJMKg9ccNg8bsWwa.mp4?tag=2"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLfinal","indices":[76,85]},{"text":"LaysUnited","indices":[89,100]}],"urls":[],"user_mentions":[{"screen_name":"ChampionsLeague","name":"#UCLfinal","id":627673190,"id_str":"627673190","indices":[3,19]}],"symbols":[],"media":[{"id":998559751943475201,"id_str":"998559751943475201","indices":[101,124],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","url":"https:\/\/t.co\/uBu5Fe7M6S","display_url":"pic.twitter.com\/uBu5Fe7M6S","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000360910077083650\/video\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}},"source_status_id":1000360910077083650,"source_status_id_str":"1000360910077083650","source_user_id":627673190,"source_user_id_str":"627673190"}]},"extended_entities":{"media":[{"id":998559751943475201,"id_str":"998559751943475201","indices":[101,124],"additional_media_info":{"title":"","description":"","embeddable":true,"monetizable":false},"media_url":"http:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","media_url_https":"https:\/\/pbs.twimg.com\/amplify_video_thumb\/998559751943475201\/img\/ReBCU3oacDdcb4a-.jpg","url":"https:\/\/t.co\/uBu5Fe7M6S","display_url":"pic.twitter.com\/uBu5Fe7M6S","expanded_url":"https:\/\/twitter.com\/ChampionsLeague\/status\/1000360910077083650\/video\/1","type":"video","video_info":{"aspect_ratio":[1,1],"duration_millis":17480,"variants":[{"bitrate":1280000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/720x720\/xfZxMeArzDuQKXbx.mp4?tag=2"},{"content_type":"application\/x-mpegURL","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/pl\/1PFEtaQXzqk9W9p7.m3u8?tag=2"},{"bitrate":320000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/240x240\/Aj2wyqDf0sXFgEWU.mp4?tag=2"},{"bitrate":832000,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/amplify_video\/998559751943475201\/vid\/480x480\/CJMKg9ccNg8bsWwa.mp4?tag=2"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":680,"resize":"fit"},"medium":{"w":720,"h":720,"resize":"fit"},"large":{"w":720,"h":720,"resize":"fit"}},"source_status_id":1000360910077083650,"source_status_id_str":"1000360910077083650","source_user_id":627673190,"source_user_id_str":"627673190"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1527340711321"} 10 | {"created_at":"Sat May 26 13:18:31 +0000 2018","id":1000365565708513281,"id_str":"1000365565708513281","text":"RT @LaLigaArab: \u0633\u064a\u062f \u0627\u0644\u0644\u0627\u0639\u0628\u064a\u0646 \ud83d\udd1d @Cristiano \ud83d\udd1d\n\n#UCLfinal https:\/\/t.co\/M33kYByzW6","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1672941752,"id_str":"1672941752","name":"medo_hazazi9 N18\ud83d\udc9b","screen_name":"rory4000","location":null,"url":null,"description":"\u0645\u0627\u064a\u062d\u062f\u062f \u0639\u0634\u0642\u0646\u0627 \u0641\u0648\u0632 \u0648\u062e\u0633\u0627\u0631\u0629..\u0645\u0646 \u062e\u0644\u0642\u0646\u0627 \u0627\u0644\u0644\u0647 \u0646\u062d\u0628 \u0627\u0644\u0625\u062a\u062d\u0627\u062f \u0639\u0627\u0634\u0642 \u0648\u062f\u0627\u0639\u0645 \u0644\u0640 \u0627\u0644\u0635\u0642\u0631 \u0646\u0627\u064a\u0641 \u0647\u0632\u0627\u0632\u064a snap : medo_hazazi9 @ittihad @realmadrid @hazazi16","translator_type":"none","protected":false,"verified":false,"followers_count":812,"friends_count":881,"listed_count":2,"favourites_count":492,"statuses_count":10528,"created_at":"Thu Aug 15 11:18:14 +0000 2013","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"ar","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/996005569965297664\/J3nWQc0c_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/996005569965297664\/J3nWQc0c_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1672941752\/1527011739","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri May 25 22:25:00 +0000 2018","id":1000140704604045312,"id_str":"1000140704604045312","text":"\u0633\u064a\u062f \u0627\u0644\u0644\u0627\u0639\u0628\u064a\u0646 \ud83d\udd1d @Cristiano \ud83d\udd1d\n\n#UCLfinal https:\/\/t.co\/M33kYByzW6","display_text_range":[0,38],"source":"\u003ca href=\"https:\/\/about.twitter.com\/products\/tweetdeck\" rel=\"nofollow\"\u003eTweetDeck\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":827488482721402884,"id_str":"827488482721402884","name":"LaLiga","screen_name":"LaLigaArab","location":null,"url":null,"description":"\u0627\u0644\u062d\u0633\u0627\u0628 \u0627\u0644\u0631\u0633\u0645\u064a \u0644\u0644\u062f\u0648\u0631\u064a \u0627\u0644\u0625\u0633\u0628\u0627\u0646\u064a \"\u0644\u0627\u0644\u064a\u063a\u0627\" \u0628\u0627\u0644\u0639\u0631\u0628\u064a\u0629 http:\/\/www.laliga.es | @LaLigaFRA | @LaLigaEN | @LaLiga http:\/\/facebook.com\/LaLiga | http:\/\/Instagram.com\/laliga","translator_type":"none","protected":false,"verified":true,"followers_count":130432,"friends_count":79,"listed_count":381,"favourites_count":2604,"statuses_count":7596,"created_at":"Fri Feb 03 12:06:43 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/827547276797173763\/-Y8Wb9qw_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/827547276797173763\/-Y8Wb9qw_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/827488482721402884\/1526857108","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":7,"reply_count":69,"retweet_count":182,"favorite_count":559,"entities":{"hashtags":[{"text":"UCLfinal","indices":[29,38]}],"urls":[],"user_mentions":[{"screen_name":"Cristiano","name":"Cristiano Ronaldo","id":155659213,"id_str":"155659213","indices":[15,25]}],"symbols":[],"media":[{"id":999637392955863040,"id_str":"999637392955863040","indices":[39,62],"media_url":"http:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","url":"https:\/\/t.co\/M33kYByzW6","display_url":"pic.twitter.com\/M33kYByzW6","expanded_url":"https:\/\/twitter.com\/LaLigaArab\/status\/1000140704604045312\/photo\/1","type":"photo","sizes":{"large":{"w":2048,"h":1536,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":999637392955863040,"id_str":"999637392955863040","indices":[39,62],"media_url":"http:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","url":"https:\/\/t.co\/M33kYByzW6","display_url":"pic.twitter.com\/M33kYByzW6","expanded_url":"https:\/\/twitter.com\/LaLigaArab\/status\/1000140704604045312\/photo\/1","type":"photo","sizes":{"large":{"w":2048,"h":1536,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"ar"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UCLfinal","indices":[45,54]}],"urls":[],"user_mentions":[{"screen_name":"LaLigaArab","name":"LaLiga","id":827488482721402884,"id_str":"827488482721402884","indices":[3,14]},{"screen_name":"Cristiano","name":"Cristiano Ronaldo","id":155659213,"id_str":"155659213","indices":[31,41]}],"symbols":[],"media":[{"id":999637392955863040,"id_str":"999637392955863040","indices":[55,78],"media_url":"http:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","url":"https:\/\/t.co\/M33kYByzW6","display_url":"pic.twitter.com\/M33kYByzW6","expanded_url":"https:\/\/twitter.com\/LaLigaArab\/status\/1000140704604045312\/photo\/1","type":"photo","sizes":{"large":{"w":2048,"h":1536,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}},"source_status_id":1000140704604045312,"source_status_id_str":"1000140704604045312","source_user_id":827488482721402884,"source_user_id_str":"827488482721402884"}]},"extended_entities":{"media":[{"id":999637392955863040,"id_str":"999637392955863040","indices":[55,78],"media_url":"http:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Dd9s6Z9U8AAUkuA.jpg","url":"https:\/\/t.co\/M33kYByzW6","display_url":"pic.twitter.com\/M33kYByzW6","expanded_url":"https:\/\/twitter.com\/LaLigaArab\/status\/1000140704604045312\/photo\/1","type":"photo","sizes":{"large":{"w":2048,"h":1536,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":900,"resize":"fit"},"small":{"w":680,"h":510,"resize":"fit"}},"source_status_id":1000140704604045312,"source_status_id_str":"1000140704604045312","source_user_id":827488482721402884,"source_user_id_str":"827488482721402884"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"ar","timestamp_ms":"1527340711415"} --------------------------------------------------------------------------------