├── .gitignore ├── LICENSE ├── README.md ├── contrib ├── iso639-1 │ ├── .gitignore │ ├── convert.py │ └── fetch.sh └── rrdg │ └── DIT │ ├── .gitignore │ └── fetch.sh ├── db ├── 00-tag2domain-db-init │ ├── init-tag2domain-tables.sh │ └── sql │ │ ├── 30-taxonomy.sql │ │ ├── 50-tags.sql │ │ └── 70-taxonomy_tag_val.sql └── db_master_script.sh ├── docker-compose.all-in-one-kafka.yml ├── docker-compose.all-in-one.yml ├── docker-compose.external-db.yml ├── docker-compose.test-db.yml ├── docker ├── all-in-one-demo │ ├── db │ │ ├── 50-init-tag2domain-db.sh │ │ └── all-in-one-demo-sql │ │ │ ├── data.sql │ │ │ ├── intersections.sql │ │ │ ├── mock_registry_data.sql │ │ │ ├── mock_registry_schema.sql │ │ │ └── tag2domain_glue_tables.sql │ ├── example.env │ └── tag2domain_intxn_tables.cfg └── external-db │ └── tag2domain.cfg.example ├── docs └── advanced_db_config.md ├── examples ├── db │ └── db.config.sh.example ├── measurements │ ├── README.md │ ├── measurement_color_empty.json │ ├── measurement_color_rgb_red.json │ ├── measurement_flavor_empty.json │ ├── measurement_flavor_salty_strongly.json │ └── submit_measurement-kafka.sh └── taxonomies │ └── example_taxonomy.yml ├── msm2tag2domain ├── app │ ├── msm2tag2domain.cfg │ └── msm2tag2domain.py └── docker │ ├── Dockerfile │ └── msm2tag2domain.cfg.example ├── py_tag2domain ├── __init__.py ├── db.py ├── db_statements.py ├── exceptions.py ├── msm2tags.py ├── requirements.txt ├── schema │ ├── measurement.json │ └── measurement.py └── util.py ├── scripts ├── db │ ├── create_glue.sh │ ├── create_glue.sql │ ├── create_intersection.sql │ ├── create_intersection_table.sh │ ├── create_intxn_table_config.sh │ └── intersection_table_config.template └── util │ └── taxonomy_postgresql_inserts.py ├── static ├── cef_logo.png ├── components.svg └── schema.svg ├── tag2domain_api ├── Dockerfile ├── Dockerfile.msm2tag ├── app │ ├── __init__.py │ ├── api_v1 │ │ ├── __init__.py │ │ ├── api.py │ │ └── endpoints │ │ │ ├── __init__.py │ │ │ ├── bydomain.py │ │ │ ├── domains.py │ │ │ ├── filters.py │ │ │ ├── meta.py │ │ │ ├── msm2tag.py │ │ │ ├── stats.py │ │ │ └── util.py │ ├── common │ │ ├── __init__.py │ │ ├── meta.py │ │ └── test.py │ ├── dotenv.sample.txt │ ├── main.py │ └── util │ │ ├── __init__.py │ │ ├── config.py │ │ ├── db.py │ │ ├── logging.py │ │ └── models.py ├── benchmark │ ├── functions.js │ └── test.yaml ├── requirements.txt └── secrets.env.example └── tests ├── __init__.py ├── config ├── db.cfg.ci └── db.cfg.example ├── db_mock_data ├── 50-init-tag2domain-db.sh ├── basic │ ├── tag2domain_db_test_data.sql │ ├── tag2domain_db_test_glue_views.sql │ └── tag2domain_db_test_schema.sql └── test_cases │ ├── tags_categories.sql │ └── version_tags.sql ├── env.example ├── tests_py_tag2domain ├── __init__.py ├── db_test_classes.py ├── test_db.py ├── test_msm2tags.py └── test_util.py ├── tests_tag2domain_api ├── __init__.py ├── db_test_classes.py ├── test_bydomain_endpoints.py ├── test_db.py ├── test_domains_endpoints.py ├── test_filters_endpoints.py ├── test_meta_endpoints.py ├── test_msm2tag_endpoints.py ├── test_stats_endpoints.py └── test_test_endpoints.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv*/* 2 | api/venv 3 | api/ENV 4 | __pycache__ 5 | api/.idea 6 | docker-compose-dev.yml 7 | benchmark/__benchmark_data 8 | benchmark/www 9 | *.swp 10 | tests/tests_py_tag2domain/db_mock_data/db.cfg 11 | *.pyc 12 | .env 13 | msm2tag2domain/docker/msm2tag2domain.cfg 14 | password 15 | .coverage 16 | tests/config/db.cfg 17 | htmlcov/ 18 | db.config.sh 19 | -------------------------------------------------------------------------------- /contrib/iso639-1/.gitignore: -------------------------------------------------------------------------------- 1 | ietf-language-tags_csv.csv 2 | language-codes-3b2_csv.csv 3 | language-codes-full_csv.csv 4 | language-codes.csv 5 | -------------------------------------------------------------------------------- /contrib/iso639-1/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import csv 5 | 6 | """ 7 | Input format 8 | 9 | alpha3-b,alpha2,English 10 | aar,aa,Afar 11 | abk,ab,Abkhazian 12 | """ 13 | 14 | 15 | r""" 16 | Output format: 17 | COPY public.tags (tag_id, tag_name, tag_description, taxonomy_id, extras) FROM stdin; 18 | 1 PPC 4 \N \N 19 | 2 For Sale 4 \N \N 20 | 3 Under Construction Default Registrar/Hosting 4 \N \N 21 | """ 22 | 23 | id = 32 24 | taxonomy = 7 25 | 26 | print("COPY public.tags (tag_id, tag_name, taxonomy_id, tag_description, extras) FROM stdin;") 27 | with sys.stdin as infile: 28 | reader = csv.reader(infile, delimiter=',', ) 29 | for row in reader: 30 | # print(row) 31 | print("%d\t%s\t%s\t%s\t\\N" % (id, row[1], taxonomy, row[2])) 32 | id += 1 33 | print("\\.") 34 | -------------------------------------------------------------------------------- /contrib/iso639-1/fetch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OUT=language-codes.csv 4 | url=https://datahub.io/core/language-codes/r/language-codes-3b2.csv 5 | 6 | wget -O $OUT $url 7 | 8 | # Input format 9 | # 10 | #alpha3-b,alpha2,English 11 | #aar,aa,Afar 12 | #abk,ab,Abkhazian 13 | 14 | 15 | # Output format: 16 | # COPY public.tags (tag_id, tag_name, taxonomy_id, tag_description, extras) FROM stdin; 17 | # 1 PPC 4 \N \N 18 | # 2 For Sale 4 \N \N 19 | # 3 Under Construction Default Registrar/Hosting 4 \N \N 20 | tail -n +2 $OUT | python convert.py 21 | 22 | -------------------------------------------------------------------------------- /contrib/rrdg/DIT/.gitignore: -------------------------------------------------------------------------------- 1 | DIT-20171108__noblanks.xlsx 2 | data_dit_full.xlsx 3 | 4 | -------------------------------------------------------------------------------- /contrib/rrdg/DIT/fetch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget https://rrdg.centr.org/docs/1/data_dit_full.xlsx 4 | -------------------------------------------------------------------------------- /db/00-tag2domain-db-init/init-tag2domain-tables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | cd "$SCRIPT_DIR" 6 | 7 | function error_exit { 8 | >&2 echo "ERROR: $1" 9 | exit 1 10 | } 11 | 12 | if [ -z "$POSTGRES_USER" ]; then 13 | error_exit "POSTGRES_USER is not set" 14 | fi 15 | 16 | if [ -z "$POSTGRES_DB" ]; then 17 | error_exit "POSTGRES_DB is not set" 18 | fi 19 | 20 | if [ -z "$TAG2DOMAIN_SCHEMA" ]; then 21 | error_exit "TAG2DOMAIN_SCHEMA is not set" 22 | fi 23 | 24 | function run_psql_script { 25 | psql \ 26 | -v ON_ERROR_STOP=1 \ 27 | -v t2d_schema="$TAG2DOMAIN_SCHEMA" \ 28 | --username "$POSTGRES_USER" \ 29 | --dbname "$POSTGRES_DB" \ 30 | -f "$1" 31 | } 32 | 33 | if [ -n "$POSTGRES_HOST" ]; then 34 | export PGHOST="$POSTGRES_HOST" 35 | fi 36 | 37 | if [ -n "$POSTGRES_PORT" ]; then 38 | export PGPORT="$POSTGRES_PORT" 39 | fi 40 | 41 | if [ -n "$POSTGRES_PASSWORD_FILE" ]; then 42 | export PGPASSFILE="$POSTGRES_PASSWORD_FILE" 43 | fi 44 | 45 | for file in $(find . -mindepth 2 -maxdepth 2 -name "*.sql" | sort); do 46 | run_psql_script $file 47 | done 48 | -------------------------------------------------------------------------------- /db/00-tag2domain-db-init/sql/30-taxonomy.sql: -------------------------------------------------------------------------------- 1 | SET statement_timeout = 0; 2 | SET lock_timeout = 0; 3 | SET idle_in_transaction_session_timeout = 0; 4 | SET client_encoding = 'UTF8'; 5 | SET standard_conforming_strings = on; 6 | SET check_function_bodies = false; 7 | SET xmloption = content; 8 | SET client_min_messages = warning; 9 | SET row_security = off; 10 | 11 | SET default_tablespace = ''; 12 | 13 | SET default_with_oids = false; 14 | 15 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 16 | SET search_path TO :t2d_schema; 17 | 18 | CREATE TABLE taxonomy ( 19 | id integer NOT NULL, 20 | name character varying(300) NOT NULL, 21 | description text, 22 | is_actionable double precision, 23 | is_automatically_classifiable boolean, 24 | is_stable boolean, 25 | for_numbers boolean, 26 | for_domains boolean, 27 | url character varying(500), 28 | allows_auto_tags boolean DEFAULT false, 29 | allows_auto_values boolean DEFAULT false 30 | ); 31 | 32 | CREATE SEQUENCE taxonomy_id_seq 33 | AS integer 34 | START WITH 1 35 | INCREMENT BY 1 36 | NO MINVALUE 37 | NO MAXVALUE 38 | CACHE 1; 39 | 40 | ALTER SEQUENCE taxonomy_id_seq OWNED BY taxonomy.id; 41 | ALTER TABLE ONLY taxonomy ALTER COLUMN id SET DEFAULT nextval('taxonomy_id_seq'::regclass); 42 | ALTER TABLE ONLY taxonomy ADD CONSTRAINT taxonomy_name_key UNIQUE (name); 43 | ALTER TABLE ONLY taxonomy ADD CONSTRAINT taxonomy_pkey PRIMARY KEY (id); 44 | 45 | COMMENT ON COLUMN taxonomy.id IS 'Primary Key'; 46 | COMMENT ON COLUMN taxonomy.name IS 'Name of the taxonomy'; 47 | COMMENT ON COLUMN taxonomy.description IS 'Short description of the taxonomy'; 48 | COMMENT ON COLUMN taxonomy.is_actionable IS '1 is taxonomy is actionable, 0 if not'; 49 | COMMENT ON COLUMN taxonomy.is_automatically_classifiable IS '1 if tags can be automatically detected, 0 if not'; 50 | COMMENT ON COLUMN taxonomy.is_stable IS 'rue if taxonomy is stable'; 51 | COMMENT ON COLUMN taxonomy.for_numbers IS 'true if taxonomy can be applied to IP adresses'; 52 | COMMENT ON COLUMN taxonomy.for_domains IS 'true if taxonomy can be applied to domains'; 53 | COMMENT ON COLUMN taxonomy.url IS 'Link to a description or the source of the taxonomy'; 54 | COMMENT ON COLUMN taxonomy.allows_auto_tags IS 'true if the taxonomy allows the automatic generation of tags'; 55 | COMMENT ON COLUMN taxonomy.allows_auto_values IS 'true if the taxonomy allows the automatic generation of values'; 56 | -------------------------------------------------------------------------------- /db/00-tag2domain-db-init/sql/50-tags.sql: -------------------------------------------------------------------------------- 1 | SET statement_timeout = 0; 2 | SET lock_timeout = 0; 3 | SET idle_in_transaction_session_timeout = 0; 4 | SET client_encoding = 'UTF8'; 5 | SET standard_conforming_strings = on; 6 | SET check_function_bodies = false; 7 | SET xmloption = content; 8 | SET client_min_messages = warning; 9 | SET row_security = off; 10 | 11 | SET default_tablespace = ''; 12 | 13 | SET default_with_oids = false; 14 | 15 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 16 | SET search_path TO :t2d_schema; 17 | 18 | CREATE TABLE tags ( 19 | tag_id integer NOT NULL, 20 | tag_name character varying(200), 21 | tag_description character varying NOT NULL, 22 | taxonomy_id integer, 23 | extras jsonb 24 | ); 25 | 26 | 27 | CREATE SEQUENCE tags_tag_id_seq 28 | START WITH 1 29 | INCREMENT BY 1 30 | NO MINVALUE 31 | NO MAXVALUE 32 | CACHE 1; 33 | 34 | ALTER SEQUENCE tags_tag_id_seq OWNED BY tags.tag_id; 35 | ALTER TABLE ONLY tags ALTER COLUMN tag_id SET DEFAULT nextval('tags_tag_id_seq'::regclass); 36 | ALTER TABLE ONLY tags ADD CONSTRAINT pk_tags_tag_id PRIMARY KEY (tag_id); 37 | ALTER TABLE ONLY tags ADD CONSTRAINT tags_tag_name_taxonomy_id_key UNIQUE (tag_name, taxonomy_id); 38 | ALTER TABLE ONLY tags ADD CONSTRAINT taxonomy_predicates_taxonomy_id_fkey FOREIGN KEY (taxonomy_id) REFERENCES taxonomy(id); 39 | 40 | COMMENT ON COLUMN tags.tag_id IS 'Primary Key'; 41 | COMMENT ON COLUMN tags.tag_name IS 'Tag Name'; 42 | COMMENT ON COLUMN tags.tag_description IS 'Short description of the tag'; 43 | COMMENT ON COLUMN tags.taxonomy_id IS 'Foreign key in "taxonomy"'; 44 | COMMENT ON COLUMN tags.extras IS 'Additional descriptions of the tag'; 45 | -------------------------------------------------------------------------------- /db/00-tag2domain-db-init/sql/70-taxonomy_tag_val.sql: -------------------------------------------------------------------------------- 1 | SET statement_timeout = 0; 2 | SET lock_timeout = 0; 3 | SET idle_in_transaction_session_timeout = 0; 4 | SET client_encoding = 'UTF8'; 5 | SET standard_conforming_strings = on; 6 | SET check_function_bodies = false; 7 | SET xmloption = content; 8 | SET client_min_messages = warning; 9 | SET row_security = off; 10 | 11 | SET default_tablespace = ''; 12 | SET default_with_oids = false; 13 | 14 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 15 | SET search_path TO :t2d_schema; 16 | 17 | CREATE TABLE taxonomy_tag_val ( 18 | id integer NOT NULL, 19 | value text, 20 | tag_id integer NOT NULL 21 | ); 22 | 23 | CREATE SEQUENCE taxonomy_tag_val_id_seq 24 | AS integer 25 | START WITH 1 26 | INCREMENT BY 1 27 | NO MINVALUE 28 | NO MAXVALUE 29 | CACHE 1; 30 | 31 | ALTER SEQUENCE taxonomy_tag_val_id_seq OWNED BY taxonomy_tag_val.id; 32 | ALTER TABLE ONLY taxonomy_tag_val ALTER COLUMN id SET DEFAULT nextval('taxonomy_tag_val_id_seq'::regclass); 33 | ALTER TABLE ONLY taxonomy_tag_val ADD CONSTRAINT taxonomy_tag_val_pkey PRIMARY KEY (id); 34 | ALTER TABLE ONLY taxonomy_tag_val ADD CONSTRAINT taxonomy_tag_val_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES tags(tag_id); 35 | 36 | COMMENT ON TABLE taxonomy_tag_val IS 'Table of tag2domain-tag values'; 37 | COMMENT ON COLUMN taxonomy_tag_val.id IS 'Primary Key'; 38 | COMMENT ON COLUMN taxonomy_tag_val.value IS 'Value for this Tag'; 39 | COMMENT ON COLUMN taxonomy_tag_val.tag_id IS 'ID of tag the value is associated with (foreign key in "tags")'; 40 | -------------------------------------------------------------------------------- /db/db_master_script.sh: -------------------------------------------------------------------------------- 1 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 2 | cd "$SCRIPT_DIR" 3 | 4 | for dir in $(find $(pwd) -mindepth 1 -maxdepth 1 -type d | sort); do 5 | echo "Running scripts in directory $dir" 6 | for script in $(find $dir -name "*.sh" -type f); do 7 | echo "-- running script $script" 8 | bash $script 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /docker-compose.all-in-one-kafka.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | # This compose configuration is set up to run from the repository root folder 4 | 5 | services: 6 | db: 7 | image: postgres:11.9 8 | restart: unless-stopped 9 | environment: 10 | POSTGRES_USER: ${POSTGRES_USER} 11 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 12 | POSTGRES_DB: ${POSTGRES_DB} 13 | TAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 14 | TAG2DOMAIN_ENTITY_TABLE: ${TAG2DOMAIN_ENTITY_TABLE} 15 | TAG2DOMAIN_ENTITY_ID_COLUMN: ${TAG2DOMAIN_ENTITY_ID_COLUMN} 16 | TAG2DOMAIN_ENTITY_NAME_COLUMN: ${TAG2DOMAIN_ENTITY_NAME_COLUMN} 17 | # uncomment to make DB accessible from the outside on port 5432 18 | ports: 19 | - "5432:5432" 20 | volumes: 21 | # Basic tag2domain DB configuration scripts 22 | - ./db/db_master_script.sh:/docker-entrypoint-initdb.d/00-db_master_script.sh:ro 23 | - ./db/00-tag2domain-db-init:/docker-entrypoint-initdb.d/00-tag2domain-db-init:ro 24 | # all-in-one-demo: mock registry + glue tables to work with tag2domain + mock data 25 | - ./docker/all-in-one-demo/db:/docker-entrypoint-initdb.d/50-all-in-one-demo:ro 26 | networks: 27 | - backend 28 | 29 | api: 30 | build: 31 | context: . 32 | dockerfile: tag2domain_api/Dockerfile.msm2tag 33 | restart: unless-stopped 34 | environment: 35 | LOG_LEVEL: INFO 36 | MODULE_NAME: tag2domain_api.app.main 37 | PORT: ${TAG2DOMAIN_API_PORT} 38 | DBHOST: db 39 | DBUSER: ${POSTGRES_USER} 40 | DBPASSWORD: ${POSTGRES_PASSWORD} 41 | DBSSLMODE: ${POSTGRES_SSLMODE} 42 | DBTAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 43 | DB: ${POSTGRES_DB} 44 | TIMEOUT: 300 45 | GRACEFUL_TIMEOUT: 300 46 | ENABLE_MSM2TAG: "False" 47 | MAX_WORKERS: 1 48 | ports: 49 | - "${TAG2DOMAIN_API_PORT}:${TAG2DOMAIN_API_PORT}" 50 | depends_on: 51 | - db 52 | networks: 53 | - frontend 54 | - backend 55 | volumes: 56 | - ./docker/all-in-one-demo/tag2domain_intxn_tables.cfg:/app/intxn_tables.cfg 57 | 58 | msm2tag2domain: 59 | build: 60 | context: . 61 | dockerfile: msm2tag2domain/docker/Dockerfile 62 | restart: unless-stopped 63 | depends_on: 64 | - db 65 | networks: 66 | - backend 67 | volumes: 68 | - ./msm2tag2domain/docker/msm2tag2domain.cfg:/usr/src/app/msm2tag2domain.cfg 69 | 70 | 71 | networks: 72 | backend: 73 | 74 | frontend: -------------------------------------------------------------------------------- /docker-compose.all-in-one.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | # This compose configuration is set up to run from the repository root folder 4 | 5 | services: 6 | db: 7 | image: postgres:11.9 8 | restart: unless-stopped 9 | environment: 10 | POSTGRES_USER: ${POSTGRES_USER} 11 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 12 | POSTGRES_DB: ${POSTGRES_DB} 13 | TAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 14 | TAG2DOMAIN_ENTITY_TABLE: ${TAG2DOMAIN_ENTITY_TABLE} 15 | TAG2DOMAIN_ENTITY_ID_COLUMN: ${TAG2DOMAIN_ENTITY_ID_COLUMN} 16 | TAG2DOMAIN_ENTITY_NAME_COLUMN: ${TAG2DOMAIN_ENTITY_NAME_COLUMN} 17 | # uncomment to make DB accessible from outside the docker network on port 5432 18 | #ports: 19 | # - "5432:5432" 20 | volumes: 21 | # Basic tag2domain DB configuration scripts 22 | - ./db/db_master_script.sh:/docker-entrypoint-initdb.d/00-db_master_script.sh:ro 23 | - ./db/00-tag2domain-db-init:/docker-entrypoint-initdb.d/00-tag2domain-db-init:ro 24 | # all-in-one-demo: mock registry + glue tables to work with tag2domain + mock data 25 | - ./docker/all-in-one-demo/db:/docker-entrypoint-initdb.d/50-all-in-one-demo:ro 26 | networks: 27 | - backend 28 | 29 | api: 30 | build: 31 | context: . 32 | dockerfile: ./tag2domain_api/Dockerfile.msm2tag 33 | restart: unless-stopped 34 | environment: 35 | LOG_LEVEL: DEBUG 36 | MODULE_NAME: tag2domain_api.app.main 37 | PORT: ${TAG2DOMAIN_API_PORT} 38 | DBHOST: db 39 | DBUSER: ${POSTGRES_USER} 40 | DBPASSWORD: ${POSTGRES_PASSWORD} 41 | DBSSLMODE: ${POSTGRES_SSLMODE} 42 | DBTAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 43 | DB: ${POSTGRES_DB} 44 | TIMEOUT: 300 45 | GRACEFUL_TIMEOUT: 300 46 | ENABLE_MSM2TAG: "True" 47 | MSM2TAG_DB_CONFIG: /app/intxn_tables.cfg 48 | MAX_WORKERS: 1 49 | ports: 50 | - "${TAG2DOMAIN_API_PORT}:${TAG2DOMAIN_API_PORT}" 51 | depends_on: 52 | - db 53 | networks: 54 | - frontend 55 | - backend 56 | volumes: 57 | - ./docker/all-in-one-demo/tag2domain_intxn_tables.cfg:/app/intxn_tables.cfg 58 | 59 | networks: 60 | backend: 61 | 62 | frontend: 63 | -------------------------------------------------------------------------------- /docker-compose.external-db.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | # This compose configuration is set up to run from the repository root folder 4 | 5 | services: 6 | api: 7 | build: 8 | context: . 9 | dockerfile: ./tag2domain_api/Dockerfile.msm2tag 10 | restart: unless-stopped 11 | environment: 12 | LOG_LEVEL: DEBUG 13 | MODULE_NAME: tag2domain_api.app.main 14 | PORT: ${TAG2DOMAIN_API_PORT} 15 | DBHOST: db 16 | DBUSER: ${POSTGRES_USER} 17 | DBPASSWORD: ${POSTGRES_PASSWORD} 18 | DBSSLMODE: ${POSTGRES_SSLMODE} 19 | DBTAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 20 | DB: ${POSTGRES_DB} 21 | TIMEOUT: 300 22 | GRACEFUL_TIMEOUT: 300 23 | # set ENABLE_MSM2TAG to "True" if you want to be able to post measurements 24 | # using a REST call to the tag2domain API 25 | ENABLE_MSM2TAG: "True" 26 | MSM2TAG_DB_CONFIG: /app/intxn_tables.cfg 27 | MAX_WORKERS: 1 28 | ports: 29 | - "${TAG2DOMAIN_API_PORT}:${TAG2DOMAIN_API_PORT}" 30 | depends_on: 31 | - db 32 | networks: 33 | - frontend 34 | - backend 35 | volumes: 36 | # use tag2domain.cfg as intersection table configuration - only the 37 | # intersection tables are used. The rest of the configuration is done 38 | # using the environment variables above. 39 | - ./docker/external-db/tag2domain.cfg:/app/intxn_tables.cfg 40 | 41 | msm2tag2domain: 42 | build: 43 | context: . 44 | dockerfile: msm2tag2domain/docker/Dockerfile 45 | restart: unless-stopped 46 | depends_on: 47 | - db 48 | networks: 49 | - backend 50 | volumes: 51 | - ./docker/external-db/tag2domain.cfg:/usr/src/app/msm2tag2domain.cfg 52 | 53 | networks: 54 | backend: 55 | 56 | frontend: 57 | -------------------------------------------------------------------------------- /docker-compose.test-db.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | db: 5 | image: postgres:11.9 6 | restart: unless-stopped 7 | environment: 8 | POSTGRES_USER: ${POSTGRES_USER} 9 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} 10 | POSTGRES_DB: tag2domain_mock_db 11 | TAG2DOMAIN_SCHEMA: ${TAG2DOMAIN_SCHEMA} 12 | ports: 13 | - "5432:5432" 14 | volumes: 15 | # Basic tag2domain DB configuration scripts 16 | - ./db/db_master_script.sh:/docker-entrypoint-initdb.d/00-db_master_script.sh:ro 17 | - ./db/00-tag2domain-db-init:/docker-entrypoint-initdb.d/00-tag2domain-db-init:ro 18 | # all-in-one-demo: mock registry + glue tables to work with tag2domain + mock data 19 | - ./tests/db_mock_data:/docker-entrypoint-initdb.d/50-test-db:ro -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/50-init-tag2domain-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | cd "$SCRIPT_DIR" 6 | 7 | function run_psql_script { 8 | echo "RUNNING PSQL SCRIPT $(pwd)/$1" 9 | psql \ 10 | -v ON_ERROR_STOP=1 \ 11 | -v t2d_schema="$TAG2DOMAIN_SCHEMA" \ 12 | -v t2d_entity_table="$TAG2DOMAIN_ENTITY_TABLE" \ 13 | -v t2d_entity_id_column="$TAG2DOMAIN_ENTITY_ID_COLUMN" \ 14 | --username "$POSTGRES_USER" \ 15 | --dbname "$POSTGRES_DB" \ 16 | -f "$1" 17 | } 18 | 19 | run_psql_script all-in-one-demo-sql/mock_registry_schema.sql 20 | run_psql_script all-in-one-demo-sql/mock_registry_data.sql 21 | run_psql_script all-in-one-demo-sql/intersections.sql 22 | run_psql_script all-in-one-demo-sql/tag2domain_glue_tables.sql 23 | run_psql_script all-in-one-demo-sql/data.sql -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/all-in-one-demo-sql/data.sql: -------------------------------------------------------------------------------- 1 | SET search_path TO :t2d_schema; 2 | 3 | -- 4 | -- taxonomies 5 | -- 6 | INSERT INTO taxonomy 7 | (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) 8 | VALUES 9 | ('colors', 'Test taxonomy 1 - colors', 1, true, false, true, true, 'https://test1.com', false, false) 10 | ; 11 | 12 | INSERT INTO taxonomy 13 | (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) 14 | VALUES 15 | ('flavors', 'Test taxonomy 2 - flavors', 0, true, false, true, true, 'https://test2.com', true, true) 16 | ; 17 | 18 | -- 19 | -- tags and values 20 | -- 21 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('rgb::red','Red', 1); -- 1 22 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('rgb::green','Green', 1); -- 2 23 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('rgb::blue','Blue', 1); -- 3 24 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('cmyk::cyan','Cyan', 1); -- 4 25 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('cmyk::magenta','Magenta', 1); -- 5 26 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('cmyk::yellow','Yellow', 1); -- 6 27 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('cmyk::black','Black', 1); -- 7 28 | 29 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('sweet','Sweet', 2); -- 8 30 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('salty','Salty', 2); -- 9 31 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('sour','Sour', 2); -- 10 32 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('bitter','Bitter', 2); -- 11 33 | INSERT INTO tags (tag_name, tag_description, taxonomy_id) VALUES ('umami','Umami', 2); -- 12 34 | 35 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('very', 8); -- 1 36 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('a little', 8); -- 2 37 | 38 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('lightly', 9); -- 3 39 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('strongly', 9); -- 4 40 | 41 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('lemony', 10); -- 5 42 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('a touch', 10); -- 6 43 | 44 | -- 45 | -- intersections 46 | -- 47 | 48 | INSERT INTO intersections (entity_id, tag_id, start_date, end_date, taxonomy_id, value_id, start_ts, measured_at, end_ts, producer) 49 | VALUES (1, 1, 20201020, NULL , 1, NULL,'2020-10-20T09:00:00', '2020-11-23T12:00:00', NULL , 'init1'); 50 | 51 | INSERT INTO intersections (entity_id, tag_id, start_date, end_date, taxonomy_id, value_id, start_ts, measured_at, end_ts, producer) 52 | VALUES (1, 4, 20201020, 20201020, 1, NULL,'2020-10-10T23:51:00', '2020-10-19T12:00:00', '2020-10-20T09:00:00', 'init1'); 53 | 54 | INSERT INTO intersections (entity_id, tag_id, start_date, end_date, taxonomy_id, value_id, start_ts, measured_at, end_ts, producer) 55 | VALUES (1, 9, 20201020, NULL , 1, 4,'2020-10-12T23:51:00', '2020-10-28T06:40:00', NULL , 'init2'); 56 | 57 | INSERT INTO intersections (entity_id, tag_id, start_date, end_date, taxonomy_id, value_id, start_ts, measured_at, end_ts, producer) 58 | VALUES (1, 9, 20201020, 20201028, 1, 3,'2020-10-14T23:51:00', '2020-10-28T06:40:00', '2020-10-28T06:40:00', 'init2'); -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/all-in-one-demo-sql/intersections.sql: -------------------------------------------------------------------------------- 1 | SET statement_timeout = 0; 2 | SET lock_timeout = 0; 3 | SET idle_in_transaction_session_timeout = 0; 4 | SET client_encoding = 'UTF8'; 5 | SET standard_conforming_strings = on; 6 | SET check_function_bodies = false; 7 | SET xmloption = content; 8 | SET client_min_messages = warning; 9 | SET row_security = off; 10 | 11 | SET default_tablespace = ''; 12 | SET default_with_oids = false; 13 | 14 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 15 | SET search_path TO :t2d_schema; 16 | 17 | CREATE TABLE intersections ( 18 | entity_id bigint, 19 | tag_id integer, 20 | start_date integer, 21 | end_date integer, 22 | taxonomy_id integer, 23 | value_id integer, 24 | measured_at timestamp with time zone, 25 | start_ts timestamp with time zone, 26 | end_ts timestamp with time zone, 27 | producer character varying(100) DEFAULT NULL::character varying 28 | ); 29 | 30 | COMMENT ON TABLE intersections IS 'Intersection table that marks which tags and values where set on an entity in a given timespan'; 31 | COMMENT ON COLUMN intersections.entity_id IS 'Foreign key in the table of entities'; 32 | COMMENT ON COLUMN intersections.tag_id IS 'Foreign Key in table "tags"'; 33 | COMMENT ON COLUMN intersections.start_date IS 'Startdate as int (YYYYMMDD)'; 34 | COMMENT ON COLUMN intersections.end_date IS 'Enddate as int (YYYYMMDD)'; 35 | COMMENT ON COLUMN intersections.taxonomy_id IS 'Foreign Key in table "taxonomy"'; 36 | COMMENT ON COLUMN intersections.value_id IS 'Foreign Key in table "taxonomy_tag_val"'; 37 | COMMENT ON COLUMN intersections.measured_at IS 'Last time the tag was measured'; 38 | COMMENT ON COLUMN intersections.start_ts IS 'Start date/time of tag'; 39 | COMMENT ON COLUMN intersections.end_ts IS 'End date/time of tag'; 40 | COMMENT ON COLUMN intersections.producer IS 'Name of producer that measured the tag'; 41 | 42 | CREATE INDEX idx_intersections_entity_id ON intersections USING btree (entity_id); 43 | CREATE INDEX idx_intersections_end_ts ON intersections USING btree (end_ts DESC); 44 | CREATE INDEX idx_intersections_start_ts ON intersections USING btree (start_ts DESC); 45 | CREATE INDEX idx_intersections_tag_id ON intersections USING btree (tag_id); 46 | 47 | ALTER TABLE ONLY intersections ADD CONSTRAINT intersections_fk FOREIGN KEY (value_id) REFERENCES taxonomy_tag_val(id); 48 | ALTER TABLE ONLY intersections ADD CONSTRAINT intersections_fk_1 FOREIGN KEY (taxonomy_id) REFERENCES taxonomy(id); 49 | ALTER TABLE ONLY intersections ADD CONSTRAINT fk_intersections_entitys FOREIGN KEY (entity_id) REFERENCES :t2d_entity_table(:t2d_entity_id_column); 50 | ALTER TABLE ONLY intersections ADD CONSTRAINT fk_intersections_tags FOREIGN KEY (tag_id) REFERENCES tags(tag_id); 51 | -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/all-in-one-demo-sql/mock_registry_data.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO public.domains (domain_name) VALUES ('domain_test1.at'); 2 | INSERT INTO public.domains (domain_name) VALUES ('domain_test2.at'); 3 | INSERT INTO public.domains (domain_name) VALUES ('domain_test3.at'); 4 | INSERT INTO public.domains (domain_name) VALUES ('domain_test4.at'); 5 | INSERT INTO public.domains (domain_name) VALUES ('domain_test5.at'); 6 | 7 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_1'); 8 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_2'); 9 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_3'); 10 | 11 | INSERT INTO public.delegations (registrar_id, domain_id, start_ts, end_ts) VALUES (1, 1, '2020-10-10T12:00:00', NULL); 12 | INSERT INTO public.delegations (registrar_id, domain_id, start_ts, end_ts) VALUES (1, 1, '2016-03-01T08:31:00', '2018-02-01T20:00:00'); 13 | 14 | INSERT INTO public.delegations (registrar_id, domain_id, start_ts, end_ts) VALUES (1, 2, '2020-06-10T12:00:00', NULL); 15 | INSERT INTO public.delegations (registrar_id, domain_id, start_ts, end_ts) VALUES (1, 3, '2010-03-01T09:00:00', '2019-02-20T14:00:00'); 16 | 17 | INSERT INTO public.delegations (registrar_id, domain_id, start_ts, end_ts) VALUES (2, 2, '2017-02-28T12:00:00', '2020-01-02T06:00:00'); -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/all-in-one-demo-sql/mock_registry_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE SEQUENCE domain_seq; 2 | 3 | CREATE TABLE public.domains 4 | ( 5 | domain_id bigint NOT NULL DEFAULT nextval('domain_seq'::regclass), 6 | domain_name character varying(100) COLLATE pg_catalog."default", 7 | CONSTRAINT pk_domains PRIMARY KEY (domain_id), 8 | CONSTRAINT idx_domains_domain_name UNIQUE (domain_name) 9 | ) 10 | WITH ( 11 | OIDS = FALSE 12 | ) 13 | TABLESPACE pg_default; 14 | 15 | CREATE SEQUENCE registrar_seq; 16 | CREATE TABLE public.registrars 17 | ( 18 | registrar_id bigint NOT NULL DEFAULT nextval('registrar_seq'::regclass), 19 | registrar_name character varying(100) COLLATE pg_catalog."default", 20 | CONSTRAINT pk_registrars PRIMARY KEY (registrar_id), 21 | CONSTRAINT idx_registrars_registrar_name UNIQUE (registrar_name) 22 | ) 23 | WITH ( 24 | OIDS = FALSE 25 | ) 26 | TABLESPACE pg_default; 27 | 28 | CREATE SEQUENCE delegation_seq; 29 | CREATE TABLE public.delegations 30 | ( 31 | delegation_id bigint NOT NULL DEFAULT nextval('delegation_seq'::regclass), 32 | registrar_id bigint NOT NULL, 33 | domain_id bigint NOT NULL, 34 | start_ts timestamp with time zone NOT NULL, 35 | end_ts timestamp with time zone, 36 | CONSTRAINT pk_delegations PRIMARY KEY (delegation_id) 37 | ) 38 | WITH ( 39 | OIDS = FALSE 40 | ) 41 | TABLESPACE pg_default; 42 | 43 | ALTER TABLE ONLY public.delegations ADD CONSTRAINT fk_delegations_registrar FOREIGN KEY (registrar_id) REFERENCES registrars(registrar_id); 44 | ALTER TABLE ONLY public.delegations ADD CONSTRAINT fk_delegations_domain FOREIGN KEY (domain_id) REFERENCES domains(domain_id); -------------------------------------------------------------------------------- /docker/all-in-one-demo/db/all-in-one-demo-sql/tag2domain_glue_tables.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 2 | SET search_path TO :t2d_schema; 3 | 4 | CREATE OR REPLACE VIEW v_unified_tags 5 | AS SELECT 6 | domains.domain_id, 7 | domains.domain_name, 8 | 'domain' AS tag_type, 9 | intersections.tag_id, 10 | intersections.value_id, 11 | intersections.start_ts, 12 | intersections.measured_at, 13 | intersections.end_ts 14 | FROM public.domains AS domains 15 | JOIN intersections ON (domains.domain_id = intersections.entity_id); 16 | 17 | CREATE OR REPLACE VIEW v_tag2domain_domain_filter 18 | AS 19 | -- registrars 20 | SELECT 21 | domains.domain_id AS domain_id, 22 | 'registrar-id' AS tag_name, 23 | delegations.start_ts AS start_ts, 24 | delegations.end_ts AS end_ts, 25 | registrars.registrar_id::text AS value 26 | FROM public.registrars AS registrars 27 | JOIN public.delegations AS delegations USING(registrar_id) 28 | JOIN public.domains AS domains USING(domain_id) 29 | ; 30 | 31 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags; 32 | -- function tag2domain_get_open_tags() 33 | -- returns a table with all currently open tags 34 | CREATE FUNCTION tag2domain_get_open_tags() 35 | RETURNS TABLE( 36 | domain_id bigint, 37 | domain_name character varying(100), 38 | tag_type text, 39 | tag_id int, 40 | value_id int, 41 | start_time timestamp with time zone, 42 | measured_at timestamp with time zone, 43 | end_time timestamp with time zone 44 | ) AS $$ 45 | SELECT * FROM v_unified_tags 46 | WHERE (v_unified_tags.end_ts IS NULL) 47 | $$ LANGUAGE SQL STABLE 48 | SET search_path TO :t2d_schema 49 | ; 50 | 51 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_filtered; 52 | -- function tag2domain_get_open_tags_filtered(filter_type, filter_value) 53 | -- 54 | -- returns a table with all currently open tags for all domains that filtered 55 | -- through the v_tag2domain_domain_filter filter tables. 56 | -- 57 | -- filter_type references the tag_name column and filter_value the value column of the 58 | -- v_tag2domain_domain_filter table. A domain passes if a row with 59 | -- tag_name=filter_type AND value=filter_value 60 | -- exists. 61 | CREATE FUNCTION tag2domain_get_open_tags_filtered(filter_type text, filter_value text) 62 | RETURNS TABLE( 63 | domain_id bigint, 64 | domain_name character varying(100), 65 | tag_type text, 66 | tag_id int, 67 | value_id int, 68 | start_time timestamp with time zone, 69 | measured_at timestamp with time zone, 70 | end_time timestamp with time zone 71 | ) AS $$ 72 | SELECT v_unified_tags.* FROM v_unified_tags 73 | JOIN v_tag2domain_domain_filter USING (domain_id) 74 | WHERE ( 75 | (v_unified_tags.end_ts IS NULL) 76 | AND (v_tag2domain_domain_filter.end_ts IS NULL) 77 | AND (v_tag2domain_domain_filter.tag_name = $1) 78 | AND (v_tag2domain_domain_filter.value = $2) 79 | ) 80 | $$ LANGUAGE SQL STABLE 81 | SET search_path TO :t2d_schema 82 | ; 83 | 84 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time; 85 | -- function tag2domain_get_tags_at_time(at_time) 86 | -- Returns all tags that were open at time at_time. 87 | CREATE FUNCTION tag2domain_get_tags_at_time(at_time timestamp) 88 | RETURNS TABLE( 89 | domain_id bigint, 90 | domain_name character varying(100), 91 | tag_type text, 92 | tag_id int, 93 | value_id int, 94 | start_time timestamp with time zone, 95 | measured_at timestamp with time zone, 96 | end_time timestamp with time zone 97 | ) AS $$ 98 | SELECT v_unified_tags.* FROM v_unified_tags 99 | WHERE ( 100 | (v_unified_tags.start_ts <= $1) 101 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 102 | ) 103 | $$ LANGUAGE SQL STABLE 104 | SET search_path TO :t2d_schema 105 | ; 106 | 107 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_filtered; 108 | -- function tag2domain_get_tags_at_time_filtered(at_time, filter_type, filter_value) 109 | -- 110 | -- returns a table with all tags that were open at time at_time for all domains that 111 | -- filtered through the v_tag2domain_domain_filter filter tables. 112 | -- 113 | -- filter_type references the tag_name column and filter_value the value column of the 114 | -- v_tag2domain_domain_filter table. A domain passes if a row with 115 | -- tag_name=filter_type AND value=filter_value 116 | -- exists. 117 | CREATE FUNCTION tag2domain_get_tags_at_time_filtered(at_time timestamp, filter_type text, filter_value text) 118 | RETURNS TABLE( 119 | domain_id bigint, 120 | domain_name character varying(100), 121 | tag_type text, 122 | tag_id int, 123 | value_id int, 124 | start_time timestamp with time zone, 125 | measured_at timestamp with time zone, 126 | end_time timestamp with time zone 127 | ) AS $$ 128 | SELECT v_unified_tags.* FROM v_unified_tags 129 | JOIN v_tag2domain_domain_filter USING (domain_id) 130 | WHERE ( 131 | (v_unified_tags.start_ts <= $1) 132 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 133 | AND (v_tag2domain_domain_filter.start_ts AT TIME ZONE 'UTC' <= $1) 134 | AND ((v_tag2domain_domain_filter.end_ts AT TIME ZONE 'UTC' > $1) OR (v_tag2domain_domain_filter.end_ts IS NULL)) 135 | AND (v_tag2domain_domain_filter.tag_name = $2) 136 | AND (v_tag2domain_domain_filter.value = $3) 137 | ) 138 | $$ LANGUAGE SQL STABLE 139 | SET search_path TO :t2d_schema 140 | ; 141 | 142 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_domain; 143 | -- function tag2domain_get_open_tags_domain(domain_name) 144 | -- 145 | -- returns a table with the open tags for a single domain with name domain_name 146 | CREATE FUNCTION tag2domain_get_open_tags_domain(domain_name character varying (100)) 147 | RETURNS TABLE( 148 | domain_id bigint, 149 | domain_name character varying(100), 150 | tag_type text, 151 | tag_id int, 152 | value_id int, 153 | start_time timestamp with time zone, 154 | measured_at timestamp with time zone, 155 | end_time timestamp with time zone 156 | ) AS $$ 157 | SELECT v_unified_tags.* FROM v_unified_tags 158 | WHERE 159 | (v_unified_tags.end_ts IS NULL) 160 | AND (v_unified_tags.domain_name = $1) 161 | $$ LANGUAGE SQL STABLE 162 | SET search_path TO :t2d_schema 163 | ; 164 | 165 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_domain; 166 | -- function tag2domain_get_open_tags_domain(at_time, domain_name) 167 | -- 168 | -- returns a table with the tags set at time at_time for a single domain with name domain_name 169 | CREATE FUNCTION tag2domain_get_tags_at_time_domain(at_time timestamp, domain_name character varying (100)) 170 | RETURNS TABLE( 171 | domain_id bigint, 172 | domain_name character varying(100), 173 | tag_type text, 174 | tag_id int, 175 | value_id int, 176 | start_time timestamp with time zone, 177 | measured_at timestamp with time zone, 178 | end_time timestamp with time zone 179 | ) AS $$ 180 | SELECT * FROM v_unified_tags 181 | WHERE ( 182 | (v_unified_tags.start_ts <= $1) 183 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 184 | AND (v_unified_tags.domain_name = $2) 185 | ) 186 | $$ LANGUAGE SQL STABLE 187 | SET search_path TO :t2d_schema 188 | ; 189 | 190 | DROP FUNCTION IF EXISTS tag2domain_get_all_tags_domain; 191 | -- function tag2domain_get_all_tags_domain(domain_name) 192 | -- 193 | -- returns a table with all tags that were ever set for a single domain with name domain_name 194 | CREATE FUNCTION tag2domain_get_all_tags_domain(domain_name character varying (100)) 195 | RETURNS TABLE( 196 | domain_id bigint, 197 | domain_name character varying(100), 198 | tag_type text, 199 | tag_id int, 200 | value_id int, 201 | start_time timestamp with time zone, 202 | measured_at timestamp with time zone, 203 | end_time timestamp with time zone 204 | ) AS $$ 205 | SELECT v_unified_tags.* FROM v_unified_tags 206 | WHERE ( 207 | (v_unified_tags.domain_name = $1) 208 | ) 209 | $$ LANGUAGE SQL 210 | SET search_path TO :t2d_schema 211 | ; -------------------------------------------------------------------------------- /docker/all-in-one-demo/example.env: -------------------------------------------------------------------------------- 1 | # postgres options used to setup a new database and connect to it 2 | ## username for database access - SET THIS! 3 | POSTGRES_USER= 4 | ## password used to access the database - SET THIS! 5 | POSTGRES_PASSWORD= 6 | POSTGRES_DB=tag2domain_mock_db 7 | POSTGRES_SSLMODE=prefer 8 | 9 | # schema tag2domain tables live in 10 | TAG2DOMAIN_SCHEMA=tag2domain 11 | 12 | # options for db/all-in-one-demo/intersections.sql script 13 | # This script creates an intersection table between the tag2domain tables and 14 | # the domain table 15 | TAG2DOMAIN_ENTITY_TABLE=public.domains 16 | TAG2DOMAIN_ENTITY_ID_COLUMN=domain_id 17 | TAG2DOMAIN_ENTITY_NAME_COLUMN=domain_name 18 | 19 | # tag2domain-api options 20 | TAG2DOMAIN_API_PORT=8001 21 | -------------------------------------------------------------------------------- /docker/all-in-one-demo/tag2domain_intxn_tables.cfg: -------------------------------------------------------------------------------- 1 | [db.intxn_table.domain] 2 | table_name=intersections 3 | id=entity_id 4 | taxonomy_id=taxonomy_id 5 | tag_id=tag_id 6 | value_id=value_id 7 | measured_at=measured_at 8 | producer=producer 9 | start_date=start_date 10 | end_date=end_date 11 | start_ts=start_ts 12 | end_ts=end_ts -------------------------------------------------------------------------------- /docker/external-db/tag2domain.cfg.example: -------------------------------------------------------------------------------- 1 | [db] 2 | host= 3 | user= 4 | password= 5 | dbname=tag2domain_mock_db 6 | sslmode=prefer 7 | schema=tag2domain 8 | 9 | [logging] 10 | level=INFO 11 | 12 | [tag2domain] 13 | max_measurement_age=360 14 | 15 | [kafka] 16 | topic_name=test.msms_in 17 | # 18 | group_id= 19 | bootstrap_servers= 20 | client_id=kafka-python-msm2tag2domain 21 | 22 | # Insert intersection tables below 23 | 24 | #[db.intxn_table.domain] 25 | #table_name=intersections 26 | #id=entity_id 27 | #taxonomy_id=taxonomy_id 28 | #tag_id=tag_id 29 | #value_id=value_id 30 | #measured_at=measured_at 31 | #producer=producer 32 | #start_date=start_date 33 | #end_date=end_date 34 | #start_ts=start_ts 35 | #end_ts=end_ts -------------------------------------------------------------------------------- /docs/advanced_db_config.md: -------------------------------------------------------------------------------- 1 | # Advanced DB configuration 2 | 3 | ## Glue tables 4 | tag2domain-api uses SQL function in the glue section of the tag2domain schema 5 | to gather the tags that are active at any time. This allows one to combine 6 | information from multiple tables and provide them through the tag2domain API. 7 | 8 | The functions required are: 9 | + _tag2domain_get_open_tags_() - provides a table with all tags that are open, 10 | i.e. whose end_time is NULL 11 | + _tag2domain_get_tags_at_time_(at_time) - provides a table with all tags that 12 | were active at time _at_time_ 13 | + _tag2domain_get_open_tags_domain_(domain_name)_ - provides a table with all 14 | tags that are open (end_time is NULL) and that are associated with domain name 15 | _domain_name_ 16 | + _tag2domain_get_tags_at_time_domain_(at_time, domain_name) - provides a table 17 | with all tags that are associated with _domain_name_ and that were open at time 18 | _at_time_ 19 | + _tag2domain_get_all_tags_domain(domain_name)_ - get all tags that were ever 20 | associated with the domain name _domain_name_ 21 | 22 | In addition there are also filtered versions of the first two functions: 23 | + _tag2domain_get_open_tags_filtered(filter_type, filter_value)_ 24 | + _tag2domain_get_tags_at_time_filtered(at_time, filter_type, filter_value)_ 25 | 26 | These functions only return domains that have a certain property. See the 27 | [Filters](#filters) section for more details on how filters can be implemented. 28 | 29 | As an illustration consider the tables that are set up for the all-in-one demo 30 | setup (`docker/all-in-one-demo/db/all-in-one-demo-sql/tag2domain_glue_tables.sql`). 31 | First a view is defined: 32 | ``` sql 33 | CREATE OR REPLACE VIEW v_unified_tags 34 | AS SELECT 35 | domains.domain_id, 36 | domains.domain_name, 37 | 'domain' AS tag_type, 38 | intersections.tag_id, 39 | intersections.value_id, 40 | intersections.start_ts, 41 | intersections.measured_at, 42 | intersections.end_ts 43 | FROM public.domains AS domains 44 | JOIN intersections ON (domains.domain_id = intersections.entity_id); 45 | ``` 46 | In this case this view simply combines the domain ID and name with the tags 47 | in the intersections table. The required functions are then based on this view: 48 | ``` sql 49 | CREATE FUNCTION tag2domain_get_open_tags() 50 | RETURNS TABLE( 51 | domain_id bigint, 52 | domain_name character varying(100), 53 | tag_type text, 54 | tag_id int, 55 | value_id int, 56 | start_time timestamp with time zone, 57 | measured_at timestamp with time zone, 58 | end_time timestamp with time zone 59 | ) AS $$ 60 | SELECT * FROM v_unified_tags 61 | WHERE (v_unified_tags.end_ts IS NULL) 62 | $$ LANGUAGE SQL STABLE; 63 | ``` 64 | 65 | The simplest way to extend this setup to one with two intersection tables is 66 | to replace the v_unified_tags with a version where multiple intersection tables 67 | are combined through a `UNION ALL`: 68 | ``` sql 69 | CREATE OR REPLACE VIEW v_unified_tags 70 | AS SELECT 71 | domains.domain_id, 72 | domains.domain_name, 73 | 'A' AS tag_type, 74 | intersections_A.tag_id, 75 | intersections_A.value_id, 76 | intersections_A.start_ts, 77 | intersections_A.measured_at, 78 | intersections_A.end_ts 79 | FROM public.domains AS domains 80 | JOIN intersections_A ON (domains.domain_id = intersections_A.entity_id) 81 | UNION ALL SELECT 82 | domains.domain_id, 83 | domains.domain_name, 84 | 'B' AS tag_type, 85 | intersections_B.tag_id, 86 | intersections_B.value_id, 87 | intersections_B.start_ts, 88 | intersections_B.measured_at, 89 | intersections_B.end_ts 90 | FROM public.domains AS domains 91 | JOIN intersections_B ON (domains.domain_id = intersections_B.entity_id); 92 | ``` 93 | This allows you to combine information from different tables into a single 94 | view. However, care has to be taken that the _tag_id_s and _value_id_s that 95 | result from these views are consistent with the content of the taxonomy, tags, 96 | and the taxonomy_tag_val tables. 97 | 98 | You may wonder, why the views are wrapped into the additional layer of 99 | functions. The reason is, that the PostgreSQL planner (as of 2021-01-08) does 100 | not optimize over the UNION ALL statements, resulting in large memory 101 | consumption and long run times for the queries required by the API. This 102 | becomes especially noticeable when filters are used. The functions can be 103 | defined in such a way, that the number of domains is reduced before the tables 104 | are appended so that the number of records that have to be loaded is 105 | considerably smaller. 106 | 107 | Which brings us to the topic of filters. 108 | 109 | ## Filters 110 | Filters can be implemented in the *_filtered functions. The simplest way is to 111 | define an additional table as done in the all-in.one demo: 112 | ``` sql 113 | CREATE OR REPLACE VIEW v_tag2domain_domain_filter 114 | AS 115 | -- registrars 116 | SELECT 117 | domains.domain_id AS domain_id, 118 | 'temperature' AS tag_name, 119 | temperatures.start_ts AS start_ts, 120 | temperatures.end_ts AS end_ts, 121 | temperatures.temperature::text AS value 122 | FROM public.temperatures AS temperatures 123 | JOIN public.domains AS domains USING(domain_id) 124 | ; 125 | ``` 126 | Here a v_tag2domain_domain_filter view is defined that contains rows like the 127 | following: 128 | 129 | | domain_id | tag_name | start_ts | end_ts | value | 130 | |-----------|---------------|---------------------|---------------------|-------------| 131 | | 1 | 'temperature' | 2020-12-13T12:56:00 | NULL | 'hot' | 132 | | 2 | 'temperature' | 2020-06-01T06:13:00 | 2020-10-12T18:23:00 | 'cold' | 133 | | ... | ... | ... | ... | ... | 134 | 135 | The aim is that domains can be filtered using criteria like 136 | _tag_name = value_. This must be implemented in the *_filtered functions. In 137 | the all-in-one demo this is done like so: 138 | ``` sql 139 | CREATE FUNCTION tag2domain_get_open_tags_filtered(filter_type text, filter_value text) 140 | RETURNS TABLE( 141 | domain_id bigint, 142 | domain_name character varying(100), 143 | tag_type text, 144 | tag_id int, 145 | value_id int, 146 | start_time timestamp with time zone, 147 | measured_at timestamp with time zone, 148 | end_time timestamp with time zone 149 | ) AS $$ 150 | SELECT v_unified_tags.* FROM v_unified_tags 151 | JOIN v_tag2domain_domain_filter USING (domain_id) 152 | WHERE ( 153 | (v_unified_tags.end_ts IS NULL) 154 | AND (v_tag2domain_domain_filter.end_ts IS NULL) 155 | AND (v_tag2domain_domain_filter.tag_name = $1) 156 | AND (v_tag2domain_domain_filter.value = $2) 157 | ) 158 | $$ LANGUAGE SQL STABLE 159 | SET search_path TO :t2d_schema 160 | ; 161 | ``` 162 | 163 | 164 | 165 | ## Schema 166 | ![EER Diagram](../static/schema.svg) 167 | -------------------------------------------------------------------------------- /examples/db/db.config.sh.example: -------------------------------------------------------------------------------- 1 | # name of the schema the tag2domain tables are generated in 2 | export TAG2DOMAIN_SCHEMA=tag2domain 3 | 4 | # Username used to access the database 5 | export POSTGRES_USER= 6 | 7 | # Name of the database the tag2domain database lives in 8 | export POSTGRES_DB= 9 | 10 | # optional parameters 11 | 12 | # use POSTGRES_HOST to specify a database host other than localhost 13 | # export POSTGRES_HOST= 14 | 15 | # use POSTGRES_PORT to specify a port other than 5432 for the database connection 16 | # export POSTGRES_PORT= 17 | 18 | # use the POSTGRES_PASSWORD_FILE option to specify a file that contains a 19 | # password for accessing the database. 20 | # 21 | # This file should contain a line in this format: 22 | # 23 | # hostname:port:database:username:password 24 | # 25 | # Each of the first four parameters can be * to match everything. Note, that 26 | # access to the file must be restricted by setting chmod 0600 . 27 | # 28 | # See https://www.postgresql.org/docs/current/libpq-pgpass.html for further 29 | # details. 30 | # export POSTGRES_PASSWORD_FILE= 31 | 32 | # convert POSTGRES_PASSWORD_FILE to absolute path 33 | if [ -n "$POSTGRES_PASSWORD_FILE" ]; then 34 | POSTGRES_PASSWORD_FILE="$(readlink -f $POSTGRES_PASSWORD_FILE)" 35 | fi -------------------------------------------------------------------------------- /examples/measurements/README.md: -------------------------------------------------------------------------------- 1 | # Example tag2domain measurements 2 | ## Contents 3 | The json files found in this directory contain example measurements that 4 | can be used together with the example database that is created by the 5 | all-in-one examples. 6 | 7 | A measurement contains the following keys: 8 | + _version_ - the version of the measurement format used (at this time "1" is 9 | the only valid value) 10 | + _tag_type_ - the type of the tag to be set. This type must correspond to one 11 | of the intersection tables that has been configured for the tag2domain 12 | components. 13 | + _tagged_id_ - the ID of the entity to be tagged. This could be the ID of a 14 | domain or of a cluster. 15 | + _taxonomy_ - ID (as integer) or name (as string) of the taxonomy the tags 16 | refer to. 17 | + _producer_ - a string that identifies who or which program has produced the 18 | measurement. Note, that at this time only the same producer can modify a tag. 19 | If a measurement from producer _A_ sets a tag and producer _B_ tries to end it, 20 | the measurement will be rejected. 21 | + _measured_at_ - a timestamp that reflects the time at which the measurement 22 | was taken. The format is `YYYY-mm-ddTHH:MM:SS`. 23 | + _measurement_id_ (optional) - an ID that identifies the measurement. 24 | + _tags_ - contains a list of tags to be set Each tag has the following keys: 25 | + _tag_ - ID (as integer) or name (as string) of the tag to be set. 26 | + _value_ (optional) - ID (as integer) or value (as string) to be set. 27 | + _description_ (required if autogenerate_tags = true) - a dsecription of the tag. If a tag is newly 28 | generated this description is set with the tag. If the tag already exists 29 | this value is ignored. 30 | + _extras_ (required if autogenerate_tags = true) - a JSON object that contains further information 31 | about the tag. 32 | + _autogenerate_tags_ - set to true to automatically insert tags that are not 33 | yet defined in the taxonomy. This requires that the taxonomy allows automatic 34 | insertion of tags. 35 | + _autogenerate_values_ - set to true to automatically insert values that are 36 | not yet defined in the taxonomy. This requires that the taxonomy allows 37 | automatic insertion of values. 38 | 39 | Example (`measurement_flavor_salty_strongly.json`): 40 | 41 | ``` json 42 | { 43 | "version": "1", 44 | "tag_type": "domain", 45 | "tagged_id": 3, 46 | "taxonomy": "flavors", 47 | "producer": "test", 48 | "measured_at": "2020-12-23T10:30:51", 49 | "measurement_id": "test/1", 50 | "tags": [ 51 | { 52 | "tag": "salty", 53 | "value": "strongly" 54 | } 55 | ] 56 | } 57 | ``` 58 | 59 | ## Submitting measurements to the REST interface (all-in-one example) 60 | This example assumes that there is a tag2domain-api available where the 61 | MSM2TAG functionality is enabled. To send the `flavor_salty_strongly` 62 | measurement execute the following command (replace `` with the 63 | address where tag2domain API is running): 64 | ``` bash 65 | API_HOST= 66 | cat measurement_flavor_strongly_salty.json \ 67 | | sed "s/{MEASURED_AT}/$(date +"%Y-%m-%dT%T")/" \ 68 | | curl \ 69 | -X POST "${API_HOST}/api/v1/msm2tag/" \ 70 | -H "accept: application/json" \ 71 | -H "Content-Type: application/json" \ 72 | --data @- 73 | ``` 74 | 75 | The sed command sets the _measured_at_ field of the measurement to the current 76 | time. The resulting tag can be fetched by running 77 | ``` bash 78 | curl \ 79 | -X GET "${API_HOST}/api/v1/bydomain/domain_test3.at?limit=1000" \ 80 | -H "accept: application/json" 81 | ``` 82 | 83 | Alternatively you can test the REST interface in your browser by visiting 84 | `\/docs`. 85 | 86 | ## Submitting measurements to a kafka topic (all-in-one-kafka example) 87 | For this method of submitting measurements a [kafka](https://kafka.apache.org/) 88 | setup must be configured and the `kafka-console-producer.sh` must be accessible 89 | in the _PATH_ environment. The tools are included in the `bin/` folder of the 90 | kafka package. 91 | 92 | To send a measurement, first configure the kafka broker and topic to send to: 93 | ``` bash 94 | export KAFKA_BROKER= 95 | export KAFKA_TOPIC= 96 | ``` 97 | These options should match the broker and topic that the msm2tag2domain 98 | service is listening to. The measurement can then be submitted using the 99 | script 100 | ``` bash 101 | submit_measurement-kafka.sh measurement_flavor_strongly_salty.json 102 | ``` 103 | 104 | The resulting tag can be fetched by running this command: 105 | ``` bash 106 | API_HOST= 107 | curl \ 108 | -X GET "${API_HOST}/api/v1/bydomain/domain_test3.at?limit=1000" \ 109 | -H "accept: application/json" 110 | ``` -------------------------------------------------------------------------------- /examples/measurements/measurement_color_empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1", 3 | "tag_type": "domain", 4 | "tagged_id": 3, 5 | "taxonomy": "flavors", 6 | "producer": "test", 7 | "measured_at": "{MEASURED_AT}", 8 | "measurement_id": "test/12345", 9 | "tags": [] 10 | } 11 | -------------------------------------------------------------------------------- /examples/measurements/measurement_color_rgb_red.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1", 3 | "tag_type": "domain", 4 | "tagged_id": 3, 5 | "taxonomy": "colors", 6 | "producer": "test", 7 | "measured_at": "{MEASURED_AT}", 8 | "measurement_id": "test/12345", 9 | "tags": [ 10 | { 11 | "tag": "rgb::red" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /examples/measurements/measurement_flavor_empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1", 3 | "tag_type": "domain", 4 | "tagged_id": 3, 5 | "taxonomy": "flavors", 6 | "producer": "test", 7 | "measured_at": "{MEASURED_AT}", 8 | "measurement_id": "test/12345", 9 | "tags": [] 10 | } 11 | -------------------------------------------------------------------------------- /examples/measurements/measurement_flavor_salty_strongly.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1", 3 | "tag_type": "domain", 4 | "tagged_id": 3, 5 | "taxonomy": "flavors", 6 | "producer": "test", 7 | "measured_at": "{MEASURED_AT}", 8 | "measurement_id": "test/12345", 9 | "tags": [ 10 | { 11 | "tag": "salty", 12 | "value": "strongly" 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /examples/measurements/submit_measurement-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | infile="$1" 4 | 5 | if [ -z "$infile" ]; then 6 | >&2 echo "ERROR: not enough arguments. Usage: $0 " 7 | exit 1 8 | fi 9 | 10 | if [ -z "$KAFKA_BROKER" ]; then 11 | >&2 echo "KAFKA_BROKER is not set" 12 | exit 3 13 | fi 14 | 15 | if [ -z "$KAFKA_BROKER" ]; then 16 | >&2 echo "KAFKA_TOPIC is not set" 17 | exit 4 18 | fi 19 | 20 | KAFKA_PRODUCER_BIN="$(which kafka-console-producer.sh)" 21 | KAFKA_PRODUCER_ARGS="--broker-list $KAFKA_BROKER --topic $KAFKA_TOPIC" 22 | 23 | if [ ! -x "$KAFKA_PRODUCER_BIN" ]; then 24 | >&2 echo "ERROR: could not find kafka producer script or it is not executable" 25 | exit 2 26 | fi 27 | 28 | t=$(date +"%Y-%m-%dT%T") 29 | 30 | cat "$infile" | sed "s/{MEASURED_AT}/$t/" | tr '\n' ' ' | "$KAFKA_PRODUCER_BIN" $KAFKA_PRODUCER_ARGS 31 | -------------------------------------------------------------------------------- /examples/taxonomies/example_taxonomy.yml: -------------------------------------------------------------------------------- 1 | taxonomy: 2 | name: "Example Taxonomy" 3 | description: "An example taxonomy" 4 | url: "https://www.example.com/" 5 | is_actionable: 1 6 | 7 | # flags 8 | flags: 9 | is_automatically_classifiable: true 10 | is_stable: true 11 | for_numbers: false 12 | for_domains: true 13 | allows_auto_tags: false 14 | allows_auto_values: false 15 | 16 | tags: 17 | - 18 | name: rgb::red 19 | description: red from the rgb space 20 | values: &color_values 21 | - light 22 | - dark 23 | - 24 | name: rgb::green 25 | description: betterize leads 26 | values: *color_values # reuse values of red -------------------------------------------------------------------------------- /msm2tag2domain/app/msm2tag2domain.cfg: -------------------------------------------------------------------------------- 1 | [db] 2 | host= 3 | dbname= 4 | user= 5 | password= 6 | schema= 7 | 8 | [logging] 9 | level=INFO 10 | 11 | [tag2domain] 12 | max_measurement_age=60 13 | 14 | [kafka] 15 | topic_name=msm2tag2domain.measurements 16 | group_id=msm2tag2domain_group 17 | bootstrap_servers=atbot-kafka.labs.nic.at:9092 18 | -------------------------------------------------------------------------------- /msm2tag2domain/app/msm2tag2domain.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import sys 4 | import configparser 5 | import argparse 6 | import logging 7 | import traceback 8 | import datetime 9 | 10 | import json 11 | from kafka import KafkaConsumer 12 | 13 | from py_tag2domain.msm2tags import MeasurementToTags 14 | from py_tag2domain.db import Psycopg2Adapter 15 | from py_tag2domain.util import parse_config 16 | import py_tag2domain.exceptions 17 | 18 | LOG_LEVELS = { 19 | "debug": logging.DEBUG, 20 | "info": logging.INFO, 21 | "warning": logging.WARNING, 22 | "error": logging.ERROR 23 | } 24 | 25 | 26 | def error(s, exit_code=1): 27 | logging.error(s) 28 | sys.exit(exit_code) 29 | 30 | 31 | class KafkaLooper(object): 32 | def __init__( 33 | self, 34 | config, 35 | msm_handler, 36 | result_handler=None, 37 | logger=logging.getLogger() 38 | ): 39 | self.logger = logger 40 | self.msm_handler = msm_handler 41 | self.result_handler = result_handler 42 | 43 | # check config 44 | try: 45 | kafka_config = config["kafka"] 46 | except KeyError: 47 | error("could not find 'kafka' section in config file") 48 | 49 | for _option in [ 50 | "topic_name", 51 | "group_id", 52 | "bootstrap_servers", 53 | "client_id" 54 | ]: 55 | if not config.has_option("kafka", _option): 56 | error("could not find required option kafka.%s " 57 | "in config file" % str(_option)) 58 | 59 | # connect to kafka 60 | self.logger.info( 61 | "connecting to kafka (queue=%s, " 62 | "group=%s, bootstrap_server=%s, client_id=%s)" % ( 63 | kafka_config.get("topic_name"), 64 | kafka_config.get("group_id"), 65 | kafka_config.get("bootstrap_servers"), 66 | kafka_config.get("client_id") 67 | ) 68 | ) 69 | logging.getLogger("kafka").setLevel(logging.WARNING) 70 | try: 71 | self.consumer = KafkaConsumer( 72 | kafka_config.get("topic_name"), 73 | group_id=kafka_config.get("group_id"), 74 | bootstrap_servers=kafka_config.get("bootstrap_servers"), 75 | client_id=kafka_config.get("client_id"), 76 | max_poll_records=1, 77 | enable_auto_commit=False 78 | ) 79 | except Exception as e: 80 | error("could not connect to kafka queue (%s) - %s" % ( 81 | type(e), str(e) 82 | )) 83 | 84 | def loop(self): 85 | self.logger.info("startup finished, waiting for kafka events") 86 | for msg in self.consumer: 87 | self.logger.info("received kafka message") 88 | if msg.value == 'json failed to parse': 89 | self.logger.warning("received json failed to parse") 90 | self.consumer.commit() 91 | continue 92 | try: 93 | msg_decoded = msg.value.decode('utf-8', 'ignore') 94 | except ValueError: 95 | self.logger.warning("Could not decode msg: {}".format(msg)) 96 | continue 97 | 98 | try: 99 | measurement = json.loads(msg_decoded) 100 | except ValueError: 101 | self.logger.warning( 102 | "could not parse json: {}".format(msg_decoded) 103 | ) 104 | continue 105 | 106 | success, result = self.msm_handler(measurement) 107 | if success: 108 | self.logger.debug( 109 | "handled measurement successfully - committing" 110 | ) 111 | self.consumer.commit() 112 | 113 | if self.result_handler is not None: 114 | self.result_handler(success, measurement, result) 115 | 116 | if not success: 117 | continue 118 | 119 | 120 | class StreamLooper(object): 121 | KEYSTRING = "--**--SEPARATOR-52579864--**--" 122 | 123 | def __init__( 124 | self, 125 | stream, 126 | msm_handler, 127 | result_handler=None, 128 | logger=logging.getLogger() 129 | ): 130 | self.logger = logger 131 | self.msm_handler = msm_handler 132 | self.stream = stream 133 | self.result_handler = result_handler 134 | 135 | def get_msms(stream): 136 | read_string = "" 137 | char = stream.read(1) 138 | while char != '': 139 | read_string += char 140 | keystring_len = len(__class__.KEYSTRING) 141 | if ( 142 | (len(read_string) >= keystring_len) 143 | and ( 144 | read_string[keystring_len:] == __class__.KEYSTRING 145 | ) 146 | ): 147 | msm = read_string[:-len(__class__.KEYSTRING)] 148 | read_string = "" 149 | yield msm 150 | char = stream.read(1) 151 | yield read_string 152 | 153 | def loop(self): 154 | self.logger.info("startup finished, reading measurements") 155 | for msg in __class__.get_msms(self.stream): 156 | self.logger.info("read message") 157 | if msg == 'json failed to parse': 158 | self.logger.warning("received json failed to parse") 159 | continue 160 | try: 161 | measurement = json.loads(msg) 162 | except ValueError: 163 | self.logger.warning("Could not parse json: {}".format(msg)) 164 | continue 165 | 166 | success, result = self.msm_handler(measurement) 167 | if success: 168 | self.logger.debug("handled measurement successfully") 169 | else: 170 | self.logger.debug("handling of measurement failed") 171 | 172 | if self.result_handler is not None: 173 | self.result_handler(success, measurement, result) 174 | 175 | 176 | def get_msm_looper(args, config, msm_handler, result_handler=None): 177 | if args.stdin: 178 | logging.info("reading measurements from stdin") 179 | looper = StreamLooper( 180 | sys.stdin, 181 | msm_handler, 182 | result_handler=result_handler 183 | ) 184 | elif args.file: 185 | logging.info("opening measurement file %s" % args.file) 186 | try: 187 | f = open(args.file, encoding="utf-8") 188 | except IOError as e: 189 | error("could not open measurement file %s - %s" % ( 190 | config.file, 191 | str(e) 192 | )) 193 | looper = StreamLooper(f, msm_handler, result_handler=result_handler) 194 | elif args.kafka: 195 | looper = KafkaLooper( 196 | config, 197 | msm_handler, 198 | result_handler=result_handler 199 | ) 200 | else: 201 | error("no measurement source specified") 202 | 203 | return looper 204 | 205 | 206 | def run(args, config): 207 | # set up database connection 208 | db_logger = logging.getLogger() 209 | db_config, intxn_table_mappings = parse_config(config) 210 | db_pars = Psycopg2Adapter.to_psycopg_args(db_config) 211 | 212 | if db_config is None: 213 | error("could not read DB configuration") 214 | 215 | if intxn_table_mappings is None: 216 | error("table mappings are not defined") 217 | 218 | try: 219 | db_adapter = Psycopg2Adapter( 220 | db_pars, 221 | intxn_table_mappings, 222 | logger=db_logger 223 | ) 224 | except py_tag2domain.exceptions.AdapterConnectionException as e: 225 | error("could not connect to database - %s" % str(e)) 226 | 227 | # create MeasruementToTags object 228 | msm2tags_logger = logging.getLogger() 229 | max_measurement_age_int = config.getint( 230 | "tag2domain", 231 | "max_measurement_age", 232 | fallback=None 233 | ) 234 | if max_measurement_age_int is None: 235 | max_measurement_age = None 236 | logging.info( 237 | "setting up MeasurementToTags without a max measurement age" 238 | ) 239 | else: 240 | max_measurement_age = datetime.timedelta( 241 | minutes=max_measurement_age_int 242 | ) 243 | logging.info( 244 | "setting up MeasurementToTags with max measurement " 245 | "age of %i minutes" % ( 246 | max_measurement_age_int 247 | ) 248 | ) 249 | msm2tags = MeasurementToTags( 250 | db_adapter, 251 | logger=msm2tags_logger, 252 | max_measurement_age=max_measurement_age 253 | ) 254 | 255 | def msm_handler(msm): 256 | try: 257 | result = msm2tags.handle_measurement(msm) 258 | except py_tag2domain.exceptions.InvalidMeasurementException as e: 259 | logging.warning("invalid measurement - %s" % str(e)) 260 | raise 261 | except py_tag2domain.exceptions.StaleMeasurementException as e: 262 | logging.warning("stale measurement - %s" % str(e)) 263 | return True, None 264 | except Exception as e: 265 | bt = traceback.format_exc() 266 | error("unknown exception - %s\n%s" % (str(e), bt)) 267 | 268 | return True, result 269 | 270 | msm_looper = get_msm_looper(args, config, msm_handler) 271 | 272 | msm_looper.loop() 273 | 274 | logging.info("all measurements consumed - exiting") 275 | 276 | 277 | if __name__ == '__main__': 278 | parser = argparse.ArgumentParser( 279 | description="measurement to tag2domain script" 280 | ) 281 | 282 | parser.add_argument( 283 | "config", 284 | type=str, 285 | help="path of config file" 286 | ) 287 | 288 | input_group = parser.add_mutually_exclusive_group(required=True) 289 | 290 | input_group.add_argument( 291 | "-in", 292 | "--stdin", 293 | action="store_true" 294 | ) 295 | 296 | input_group.add_argument( 297 | "-f", 298 | "--file", 299 | type=str, 300 | help="read measurements from a file", 301 | default=False 302 | ) 303 | 304 | input_group.add_argument( 305 | "-k", 306 | "--kafka", 307 | action="store_true" 308 | ) 309 | 310 | logging.basicConfig(level=logging.INFO) 311 | 312 | args = parser.parse_args() 313 | 314 | logging.info("reading config file %s" % args.config) 315 | 316 | config = configparser.ConfigParser() 317 | try: 318 | keys = config.read(args.config) 319 | if len(keys) == 0: 320 | error("could not read config") 321 | except Exception as e: 322 | error("could not read config file - %s" % (str(e))) 323 | 324 | loglevel_str = config.get("logging", "level", fallback="info").lower() 325 | try: 326 | loglevel = LOG_LEVELS[loglevel_str] 327 | except KeyError: 328 | error("unknown logging level '%s' configured" % loglevel_str) 329 | logging.getLogger().setLevel(loglevel) 330 | 331 | run(args, config) 332 | -------------------------------------------------------------------------------- /msm2tag2domain/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-alpine 2 | 3 | # Combine run command to create single intermediate image layer 4 | RUN mkdir -p /usr/src/app \ 5 | && cd /usr/src/app \ 6 | # Installing runtime dependencies 7 | && apk --no-cache add \ 8 | curl \ 9 | # Installing buildtime dependencies. They will be removed at end of this 10 | # commands sequence. 11 | && apk --no-cache add --virtual build-dependencies \ 12 | build-base \ 13 | && apk --no-cache --update add postgresql-dev \ 14 | # Updating pip itself before installing packages from requirements.txt 15 | && pip install --no-cache-dir pip setuptools 16 | 17 | # copy py_tag2domain requirements.txt with its dependencies 18 | COPY py_tag2domain/requirements.txt /usr/src/app/requirements.txt 19 | # Installing pip packages from requirements.txt 20 | RUN cd /usr/src/app \ 21 | && pip install --no-cache-dir -r requirements.txt \ 22 | # Removing build dependencies leaving image layer clean and neat 23 | && apk del build-dependencies 24 | 25 | # move py_tag2domain library into the right place 26 | COPY py_tag2domain /usr/src/app/py_tag2domain 27 | 28 | # move the executable into place 29 | COPY msm2tag2domain/app/msm2tag2domain.py /usr/src/app/msm2tag2domain.py 30 | 31 | WORKDIR /usr/src/app 32 | 33 | # run command 34 | CMD ["python", "msm2tag2domain.py", "--kafka", "msm2tag2domain.cfg"] 35 | 36 | -------------------------------------------------------------------------------- /msm2tag2domain/docker/msm2tag2domain.cfg.example: -------------------------------------------------------------------------------- 1 | [db] 2 | host=db 3 | user= 4 | password= 5 | dbname=tag2domain_mock_db 6 | sslmode=prefer 7 | schema=tag2domain 8 | 9 | [logging] 10 | level=INFO 11 | 12 | [tag2domain] 13 | max_measurement_age=360 14 | 15 | [kafka] 16 | topic_name= 17 | group_id= 18 | bootstrap_servers= 19 | client_id=kafka-python-msm2tag2domain 20 | 21 | [db.intxn_table.domain] 22 | table_name=intersections 23 | id=entity_id 24 | taxonomy_id=taxonomy_id 25 | tag_id=tag_id 26 | value_id=value_id 27 | measured_at=measured_at 28 | producer=producer 29 | start_date=start_date 30 | end_date=end_date 31 | start_ts=start_ts 32 | end_ts=end_ts 33 | -------------------------------------------------------------------------------- /py_tag2domain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/py_tag2domain/__init__.py -------------------------------------------------------------------------------- /py_tag2domain/db_statements.py: -------------------------------------------------------------------------------- 1 | _d = {} 2 | db_statements = _d 3 | 4 | _d["get_taxonomy_intersections"] = ( 5 | "SELECT %(__all_fields__)s FROM %(table_name)s WHERE (%(taxonomy_id)s = %%s)" 6 | ) 7 | 8 | _d["get_open_tags"] = ( 9 | """ 10 | SELECT 11 | %(tag_id)s AS tag_id, 12 | %(value_id)s AS value_id, 13 | %(measured_at)s AS measured_at, 14 | %(producer)s AS producer 15 | FROM %(table_name)s 16 | WHERE 17 | (%(id)s = %%s) 18 | AND (%(taxonomy_id)s = %%s) 19 | AND (%(end_date)s IS NULL) 20 | AND (%(end_ts)s IS NULL) 21 | """ 22 | ) 23 | 24 | _d["get_all_tags"] = ( 25 | """ 26 | SELECT 27 | %(tag_id)s AS tag_id, 28 | %(value_id)s AS value_id, 29 | %(start_ts)s AS start_ts, 30 | %(measured_at)s AS measured_at, 31 | %(end_ts)s AS end_ts, 32 | %(producer)s AS producer 33 | FROM %(table_name)s 34 | WHERE 35 | (%(id)s = %%s) 36 | AND (%(taxonomy_id)s = %%s) 37 | """ 38 | ) 39 | 40 | _d["insert_intersections"] = ( 41 | """ 42 | INSERT INTO %(table_name)s 43 | (%(id)s, %(tag_id)s, %(start_date)s, %(end_date)s, %(taxonomy_id)s, 44 | %(value_id)s, %(measured_at)s, %(start_ts)s, %(end_ts)s, %(producer)s) 45 | VALUES (%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s) 46 | """ 47 | ) 48 | 49 | _d["prolong_intersections_no_value"] = ( 50 | """ 51 | UPDATE %(table_name)s 52 | SET 53 | %(measured_at)s = %%s, 54 | %(producer)s = %%s 55 | WHERE 56 | (%(id)s = %%s) 57 | AND (%(taxonomy_id)s = %%s) 58 | AND (%(tag_id)s = %%s) 59 | AND (%(value_id)s IS NULL) 60 | AND (%(end_date)s IS NULL) 61 | AND (%(end_ts)s IS NULL) 62 | """ 63 | ) 64 | 65 | _d["prolong_intersections_w_value"] = ( 66 | """ 67 | UPDATE %(table_name)s 68 | SET 69 | %(measured_at)s = %%s, 70 | %(producer)s = %%s 71 | WHERE 72 | (%(id)s = %%s) 73 | AND (%(taxonomy_id)s = %%s) 74 | AND (%(tag_id)s = %%s) 75 | AND (%(value_id)s = %%s) 76 | AND (%(end_date)s IS NULL) 77 | AND (%(end_ts)s IS NULL) 78 | """ 79 | ) 80 | 81 | _d["end_intersection_no_value"] = ( 82 | """ 83 | UPDATE %(table_name)s 84 | SET 85 | %(measured_at)s = %%s, 86 | %(end_date)s = %%s, 87 | %(end_ts)s = %%s, 88 | %(producer)s = %%s 89 | WHERE 90 | (%(id)s = %%s) 91 | AND (%(taxonomy_id)s = %%s) 92 | AND (%(tag_id)s = %%s) 93 | AND (%(value_id)s IS NULL) 94 | AND (%(end_date)s IS NULL) 95 | AND (%(end_ts)s IS NULL) 96 | """ 97 | ) 98 | 99 | _d["end_intersection_w_value"] = ( 100 | """ 101 | UPDATE %(table_name)s 102 | SET 103 | %(measured_at)s = %%s, 104 | %(end_date)s = %%s, 105 | %(end_ts)s = %%s, 106 | %(producer)s = %%s 107 | WHERE 108 | (%(id)s = %%s) 109 | AND (%(taxonomy_id)s = %%s) 110 | AND (%(tag_id)s = %%s) 111 | AND (%(value_id)s = %%s) 112 | AND (%(end_date)s IS NULL) 113 | AND (%(end_ts)s IS NULL) 114 | """ 115 | ) 116 | -------------------------------------------------------------------------------- /py_tag2domain/exceptions.py: -------------------------------------------------------------------------------- 1 | class AdapterConnectionException(Exception): 2 | pass 3 | 4 | 5 | class AdapterDBError(Exception): 6 | pass 7 | 8 | 9 | class InvalidMeasurementException(Exception): 10 | pass 11 | 12 | 13 | class DisallowedTaxonomyModificationException(Exception): 14 | pass 15 | 16 | 17 | class InconsistentTaxonomyException(Exception): 18 | pass 19 | 20 | 21 | class StaleMeasurementException(Exception): 22 | pass 23 | -------------------------------------------------------------------------------- /py_tag2domain/requirements.txt: -------------------------------------------------------------------------------- 1 | jsonschema==3.2.0 2 | kafka-python==2.0.2 3 | parameterized==0.7.4 4 | psycopg2-binary>=2.8 5 | pytz==2020.1 6 | rope==0.17.0 7 | six==1.15.0 8 | requests>=2.25.1 9 | -------------------------------------------------------------------------------- /py_tag2domain/schema/measurement.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "required": [ 4 | "version", 5 | "tag_type", 6 | "tagged_id", 7 | "taxonomy", 8 | "producer", 9 | "measured_at", 10 | "tags" 11 | ], 12 | "additionalProperties": false, 13 | "properties": { 14 | "version": { 15 | "type": "string", 16 | "enum": ["1"] 17 | }, 18 | "tag_type": { 19 | "type": "string" 20 | }, 21 | "tagged_id": { 22 | "type": "integer" 23 | }, 24 | "taxonomy": { 25 | "oneOf": [ 26 | {"type": "integer"}, 27 | {"type": "string", "minLength": 1} 28 | ] 29 | }, 30 | "producer": { 31 | "type": "string", 32 | "minLength": 1 33 | }, 34 | "measured_at": { 35 | "type": "string", 36 | "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\\.[0-9]+)?$" 37 | }, 38 | "tags": { 39 | "type": "array", 40 | "items": { 41 | "type": "object", 42 | "additionalProperties": false, 43 | "required": ["tag"], 44 | "properties": { 45 | "tag": { 46 | "oneOf": [ 47 | {"type": "integer"}, 48 | {"type": "string", "minLength": 1} 49 | ] 50 | }, 51 | "value": { 52 | "oneOf": [ 53 | {"type": "integer"}, 54 | {"type": "string", "minLength": 1} 55 | ] 56 | }, 57 | "description": {"type": "string"}, 58 | "extras": { 59 | "type": "object", 60 | "additionalProperties": true 61 | } 62 | } 63 | } 64 | }, 65 | "measurement_id": { "type": "string" }, 66 | "autogenerate_tags": {"type": "boolean"}, 67 | "autogenerate_values": {"type": "boolean"} 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /py_tag2domain/schema/measurement.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # noqa: F407 2 | from typing import Any, Dict, List, Optional, Union 3 | from pydantic import BaseModel, constr 4 | 5 | 6 | class Tag(BaseModel): 7 | tag: Union[int, str] 8 | value: Optional[Union[int, str]] 9 | description: Optional[str] 10 | extras: Optional[Dict[str, Any]] 11 | 12 | 13 | class MeasurementModel(BaseModel): 14 | version: str 15 | tag_type: str 16 | tagged_id: int 17 | taxonomy: Union[int, str] 18 | producer: constr(min_length=1) 19 | measured_at: constr( 20 | regex='^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?$' # noqa: W605, F722 21 | ) 22 | tags: List[Tag] 23 | measurement_id: Optional[str] 24 | autogenerate_tags: Optional[bool] 25 | autogenerate_values: Optional[bool] 26 | -------------------------------------------------------------------------------- /py_tag2domain/util.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import configparser 3 | import re 4 | import logging 5 | from psycopg2.tz import FixedOffsetTimezone 6 | 7 | logger = logging.getLogger(__file__) 8 | 9 | RE_INTERSECTION_TABLE_MAPPING_SECTION = r"db\.intxn_table\.(.+)" 10 | INTXN_TABLE_MAPPING_KEYS_REQUIRED = [ 11 | "table_name", 12 | "id", 13 | "taxonomy_id", 14 | "tag_id", 15 | "value_id", 16 | "measured_at", 17 | "producer", 18 | "start_date", 19 | "end_date", 20 | "start_ts", 21 | "end_ts" 22 | ] 23 | 24 | 25 | def _parse_db_connection(config_object): 26 | try: 27 | db_config = config_object["db"] 28 | print(db_config) 29 | tag2domain_db_config = dict( 30 | DBUSER=db_config["user"], 31 | DBHOST=db_config["host"], 32 | DATABASE=db_config["dbname"], 33 | DBPASSWORD=db_config["password"], 34 | DBPORT=db_config.getint("port", fallback=5432), 35 | DBSSLMODE=db_config.get("sslmode", fallback="require"), 36 | DBSCHEMA=db_config.get( 37 | "schema", 38 | fallback="public" 39 | ), 40 | DBTAG2DOMAIN_SCHEMA=db_config.get( 41 | "schema", 42 | fallback="public" 43 | ), 44 | DBAPPLICATION_NAME=db_config.get( 45 | "application_name", 46 | fallback="tag2domain_test" 47 | ) 48 | ) 49 | except KeyError as e: 50 | tag2domain_db_config = None 51 | logger.error( 52 | "could not find required key '%s' in db config" % (str(e)) 53 | ) 54 | return tag2domain_db_config 55 | 56 | 57 | def _part_intxn_table_mapping(config_object): 58 | mapping = {} 59 | try: 60 | for key in INTXN_TABLE_MAPPING_KEYS_REQUIRED: 61 | mapping[key] = config_object[key] 62 | except KeyError as e: 63 | raise ValueError( 64 | "could not find required key '%s' in intxn table mapping" % str(e) 65 | ) 66 | return mapping 67 | 68 | 69 | def _parse_intxn_table_mappings(config_object): 70 | intxn_table_mappings = {} 71 | for _section in config_object.keys(): 72 | m = re.match(RE_INTERSECTION_TABLE_MAPPING_SECTION, _section) 73 | if m: 74 | intxn_table_mappings[m.group(1).strip()] = \ 75 | _part_intxn_table_mapping(config_object[_section]) 76 | return intxn_table_mappings 77 | 78 | 79 | def parse_config(configfile): 80 | if isinstance(configfile, str): 81 | print("reading configfile " + configfile) 82 | config = configparser.ConfigParser() 83 | config.read(configfile) 84 | elif isinstance(configfile, configparser.ConfigParser): 85 | config = configfile 86 | if len(config.keys()) > 0: 87 | tag2domain_db_config = _parse_db_connection(config) 88 | intxn_table_mappings = _parse_intxn_table_mappings(config) 89 | else: 90 | tag2domain_db_config = None 91 | intxn_table_mappings = None 92 | 93 | return tag2domain_db_config, intxn_table_mappings 94 | 95 | 96 | def calc_changes(from_, to): 97 | """ 98 | Calculate the label changes required to change from the set of labels from_ 99 | to the set of labels to. 100 | 101 | For use with function :func:`tag2domain_execute_changes`. 102 | 103 | Returns 104 | ------- 105 | dict : keys 'insert', 'prolong', 'end' -> list 106 | keys indicate the action that is required and the lists contain the 107 | tags the action has to be applied to. 108 | """ 109 | changes = { 110 | 'insert': tuple(sorted(to - from_)), 111 | 'prolong': tuple(sorted(to & from_)), 112 | 'end': tuple(sorted(from_ - to)), 113 | } 114 | return changes 115 | 116 | 117 | def parse_timestamp(ts): 118 | """ 119 | Takes a timestamp in format "%Y-%m-%dT%H:%M:%S(.$d)" with or 120 | without microseconds and returns the time as datetime.datetime. 121 | """ 122 | try: 123 | dt = datetime.datetime.strptime( 124 | ts, 125 | "%Y-%m-%dT%H:%M:%S.%f" 126 | ) 127 | except ValueError: 128 | try: 129 | # Retry without microseconds 130 | dt = datetime.datetime.strptime( 131 | ts, 132 | "%Y-%m-%dT%H:%M:%S" 133 | ) 134 | except ValueError as e: 135 | raise ValueError("could not parse timestamp - %s" % str(e)) 136 | 137 | return dt.replace(tzinfo=FixedOffsetTimezone(offset=0, name=None)) 138 | -------------------------------------------------------------------------------- /scripts/db/create_glue.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | 5 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | cd "$SCRIPT_DIR" 7 | 8 | function error_exit { 9 | >&2 echo "ERROR: $1" 10 | exit 1 11 | } 12 | 13 | if [ -z "$POSTGRES_USER" ]; then 14 | error_exit "POSTGRES_USER is not set" 15 | fi 16 | 17 | if [ -z "$POSTGRES_DB" ]; then 18 | error_exit "POSTGRES_DB is not set" 19 | fi 20 | 21 | if [ -z "$TAG2DOMAIN_SCHEMA" ]; then 22 | error_exit "TAG2DOMAIN_SCHEMA is not set" 23 | fi 24 | 25 | if [ -z "$TAG2DOMAIN_INTXN_TABLE_NAME" ]; then 26 | error_exit "TAG2DOMAIN_INTXN_TABLE_NAME is not set" 27 | fi 28 | 29 | if [ -z "$TAG2DOMAIN_ENTITY_TABLE" ]; then 30 | error_exit "TAG2DOMAIN_ENTITY_TABLE is not set" 31 | fi 32 | 33 | if [ -z "$TAG2DOMAIN_ENTITY_ID_COLUMN" ]; then 34 | error_exit "TAG2DOMAIN_ENTITY_ID_COLUMN is not set" 35 | fi 36 | 37 | if [ -z "$TAG2DOMAIN_ENTITY_NAME_COLUMN" ]; then 38 | error_exit "TAG2DOMAIN_ENTITY_NAME_COLUMN is not set" 39 | fi 40 | 41 | if [ -z "$TAG2DOMAIN_TAG_TYPE" ]; then 42 | error_exit "TAG2DOMAIN_TAG_TYPE is not set" 43 | fi 44 | 45 | if [[ $TAG2DOMAIN_TAG_TYPE == *"."* ]]; then 46 | error_exit " can not contain a ." 47 | fi 48 | 49 | function run_psql_script { 50 | psql \ 51 | -v ON_ERROR_STOP=1 \ 52 | -v t2d_schema="$TAG2DOMAIN_SCHEMA" \ 53 | -v t2d_intxn_table_name="$TAG2DOMAIN_INTXN_TABLE_NAME" \ 54 | -v t2d_entity_table="$TAG2DOMAIN_ENTITY_TABLE" \ 55 | -v t2d_entity_id_column="$TAG2DOMAIN_ENTITY_ID_COLUMN" \ 56 | -v t2d_entity_name_column="$TAG2DOMAIN_ENTITY_NAME_COLUMN" \ 57 | -v t2d_tag_type="$TAG2DOMAIN_TAG_TYPE" \ 58 | --username "$POSTGRES_USER" \ 59 | --dbname "$POSTGRES_DB" \ 60 | -f "$1" 61 | } 62 | 63 | if [ -n "$POSTGRES_HOST" ]; then 64 | export PGHOST="$POSTGRES_HOST" 65 | fi 66 | 67 | if [ -n "$POSTGRES_PORT" ]; then 68 | export PGPORT="$POSTGRES_PORT" 69 | fi 70 | 71 | if [ -n "$POSTGRES_PASSWORD_FILE" ]; then 72 | export PGPASSFILE="$POSTGRES_PASSWORD_FILE" 73 | fi 74 | 75 | run_psql_script "create_glue.sql" -------------------------------------------------------------------------------- /scripts/db/create_glue.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 2 | SET search_path TO :t2d_schema; 3 | 4 | -- creates the view that is used to retrieve tags 5 | CREATE OR REPLACE VIEW v_unified_tags 6 | AS SELECT 7 | :t2d_entity_table.:t2d_entity_id_column AS domain_id, 8 | :t2d_entity_table.:t2d_entity_name_column AS domain_name, 9 | ':t2d_tag_type' AS tag_type, 10 | :t2d_intxn_table_name.tag_id, 11 | :t2d_intxn_table_name.value_id, 12 | :t2d_intxn_table_name.start_ts, 13 | :t2d_intxn_table_name.measured_at, 14 | :t2d_intxn_table_name.end_ts 15 | FROM :t2d_entity_table 16 | JOIN :t2d_intxn_table_name ON (:t2d_entity_table.:t2d_entity_id_column = :t2d_intxn_table_name.entity_id); 17 | 18 | -- creates a filter table (this is an empty place holder only) 19 | CREATE TABLE v_tag2domain_domain_filter ( 20 | domain_id bigint NOT NULL, 21 | tag_name character varying(200) NOT NULL, 22 | start_ts timestamp with time zone NOT NULL, 23 | end_ts timestamp with time zone NOT NULL, 24 | value text 25 | ); 26 | 27 | -- ---------------------------------------------------------------------------- 28 | -- SQL functions used to access the intersection tables 29 | -- ---------------------------------------------------------------------------- 30 | 31 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags; 32 | -- function tag2domain_get_open_tags() 33 | -- returns a table with all currently open tags 34 | CREATE FUNCTION tag2domain_get_open_tags() 35 | RETURNS TABLE( 36 | domain_id bigint, 37 | domain_name character varying(100), 38 | tag_type text, 39 | tag_id int, 40 | value_id int, 41 | start_time timestamp with time zone, 42 | measured_at timestamp with time zone, 43 | end_time timestamp with time zone 44 | ) AS $$ 45 | SELECT * FROM v_unified_tags 46 | WHERE (v_unified_tags.end_ts IS NULL) 47 | $$ LANGUAGE SQL STABLE 48 | SET search_path TO :t2d_schema 49 | ; 50 | 51 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_filtered; 52 | -- function tag2domain_get_open_tags_filtered(filter_type, filter_value) 53 | -- 54 | -- returns a table with all currently open tags for all domains that filtered 55 | -- through the v_tag2domain_domain_filter filter tables. 56 | -- 57 | -- filter_type references the tag_name column and filter_value the value column of the 58 | -- v_tag2domain_domain_filter table. A domain passes if a row with 59 | -- tag_name=filter_type AND value=filter_value 60 | -- exists. 61 | CREATE FUNCTION tag2domain_get_open_tags_filtered(filter_type text, filter_value text) 62 | RETURNS TABLE( 63 | domain_id bigint, 64 | domain_name character varying(100), 65 | tag_type text, 66 | tag_id int, 67 | value_id int, 68 | start_time timestamp with time zone, 69 | measured_at timestamp with time zone, 70 | end_time timestamp with time zone 71 | ) AS $$ 72 | SELECT v_unified_tags.* FROM v_unified_tags 73 | JOIN v_tag2domain_domain_filter USING (domain_id) 74 | WHERE ( 75 | (v_unified_tags.end_ts IS NULL) 76 | AND (v_tag2domain_domain_filter.end_ts IS NULL) 77 | AND (v_tag2domain_domain_filter.tag_name = $1) 78 | AND (v_tag2domain_domain_filter.value = $2) 79 | ) 80 | $$ LANGUAGE SQL STABLE 81 | SET search_path TO :t2d_schema 82 | ; 83 | 84 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time; 85 | -- function tag2domain_get_tags_at_time(at_time) 86 | -- Returns all tags that were open at time at_time. 87 | CREATE FUNCTION tag2domain_get_tags_at_time(at_time timestamp) 88 | RETURNS TABLE( 89 | domain_id bigint, 90 | domain_name character varying(100), 91 | tag_type text, 92 | tag_id int, 93 | value_id int, 94 | start_time timestamp with time zone, 95 | measured_at timestamp with time zone, 96 | end_time timestamp with time zone 97 | ) AS $$ 98 | SELECT v_unified_tags.* FROM v_unified_tags 99 | WHERE ( 100 | (v_unified_tags.start_ts <= $1) 101 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 102 | ) 103 | $$ LANGUAGE SQL STABLE 104 | SET search_path TO :t2d_schema 105 | ; 106 | 107 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_filtered; 108 | -- function tag2domain_get_tags_at_time_filtered(at_time, filter_type, filter_value) 109 | -- 110 | -- returns a table with all tags that were open at time at_time for all domains that 111 | -- filtered through the v_tag2domain_domain_filter filter tables. 112 | -- 113 | -- filter_type references the tag_name column and filter_value the value column of the 114 | -- v_tag2domain_domain_filter table. A domain passes if a row with 115 | -- tag_name=filter_type AND value=filter_value 116 | -- exists. 117 | CREATE FUNCTION tag2domain_get_tags_at_time_filtered(at_time timestamp, filter_type text, filter_value text) 118 | RETURNS TABLE( 119 | domain_id bigint, 120 | domain_name character varying(100), 121 | tag_type text, 122 | tag_id int, 123 | value_id int, 124 | start_time timestamp with time zone, 125 | measured_at timestamp with time zone, 126 | end_time timestamp with time zone 127 | ) AS $$ 128 | SELECT v_unified_tags.* FROM v_unified_tags 129 | JOIN v_tag2domain_domain_filter USING (domain_id) 130 | WHERE ( 131 | (v_unified_tags.start_ts <= $1) 132 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 133 | AND (v_tag2domain_domain_filter.start_ts AT TIME ZONE 'UTC' <= $1) 134 | AND ((v_tag2domain_domain_filter.end_ts AT TIME ZONE 'UTC' > $1) OR (v_tag2domain_domain_filter.end_ts IS NULL)) 135 | AND (v_tag2domain_domain_filter.tag_name = $2) 136 | AND (v_tag2domain_domain_filter.value = $3) 137 | ) 138 | $$ LANGUAGE SQL STABLE 139 | SET search_path TO :t2d_schema 140 | ; 141 | 142 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_domain; 143 | -- function tag2domain_get_open_tags_domain(domain_name) 144 | -- 145 | -- returns a table with the open tags for a single domain with name domain_name 146 | CREATE FUNCTION (domain_name character varying (100)) 147 | RETURNS TABLE( 148 | domain_id bigint, 149 | domain_name character varying(100), 150 | tag_type text, 151 | tag_id int, 152 | value_id int, 153 | start_time timestamp with time zone, 154 | measured_at timestamp with time zone, 155 | end_time timestamp with time zone 156 | ) AS $$ 157 | SELECT v_unified_tags.* FROM v_unified_tags 158 | WHERE 159 | (v_unified_tags.end_ts IS NULL) 160 | AND (v_unified_tags.domain_name = $1) 161 | $$ LANGUAGE SQL STABLE 162 | SET search_path TO :t2d_schema 163 | ; 164 | 165 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_domain; 166 | -- function tag2domain_get_open_tags_domain(at_time, domain_name) 167 | -- 168 | -- returns a table with the tags set at time at_time for a single domain with name domain_name 169 | CREATE FUNCTION tag2domain_get_tags_at_time_domain(at_time timestamp, domain_name character varying (100)) 170 | RETURNS TABLE( 171 | domain_id bigint, 172 | domain_name character varying(100), 173 | tag_type text, 174 | tag_id int, 175 | value_id int, 176 | start_time timestamp with time zone, 177 | measured_at timestamp with time zone, 178 | end_time timestamp with time zone 179 | ) AS $$ 180 | SELECT * FROM v_unified_tags 181 | WHERE ( 182 | (v_unified_tags.start_ts <= $1) 183 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 184 | AND (v_unified_tags.domain_name = $2) 185 | ) 186 | $$ LANGUAGE SQL STABLE 187 | SET search_path TO :t2d_schema 188 | ; 189 | 190 | DROP FUNCTION IF EXISTS tag2domain_get_all_tags_domain; 191 | -- function tag2domain_get_all_tags_domain(domain_name) 192 | -- 193 | -- returns a table with all tags that were ever set for a single domain with name domain_name 194 | CREATE FUNCTION tag2domain_get_all_tags_domain(domain_name character varying (100)) 195 | RETURNS TABLE( 196 | domain_id bigint, 197 | domain_name character varying(100), 198 | tag_type text, 199 | tag_id int, 200 | value_id int, 201 | start_time timestamp with time zone, 202 | measured_at timestamp with time zone, 203 | end_time timestamp with time zone 204 | ) AS $$ 205 | SELECT v_unified_tags.* FROM v_unified_tags 206 | WHERE ( 207 | (v_unified_tags.domain_name = $1) 208 | ) 209 | $$ LANGUAGE SQL 210 | SET search_path TO :t2d_schema 211 | ; -------------------------------------------------------------------------------- /scripts/db/create_intersection.sql: -------------------------------------------------------------------------------- 1 | SET statement_timeout = 0; 2 | SET lock_timeout = 0; 3 | SET idle_in_transaction_session_timeout = 0; 4 | SET client_encoding = 'UTF8'; 5 | SET standard_conforming_strings = on; 6 | SET check_function_bodies = false; 7 | SET xmloption = content; 8 | SET client_min_messages = warning; 9 | SET row_security = off; 10 | 11 | SET default_tablespace = ''; 12 | SET default_with_oids = false; 13 | 14 | CREATE SCHEMA IF NOT EXISTS :t2d_schema; 15 | SET search_path TO :t2d_schema; 16 | 17 | CREATE TABLE :t2d_intxn_table_name ( 18 | entity_id bigint, 19 | tag_id integer, 20 | start_date integer, 21 | end_date integer, 22 | taxonomy_id integer, 23 | value_id integer, 24 | measured_at timestamp with time zone, 25 | start_ts timestamp with time zone, 26 | end_ts timestamp with time zone, 27 | producer character varying(100) DEFAULT NULL::character varying, 28 | FOREIGN KEY (value_id) REFERENCES taxonomy_tag_val(id), 29 | FOREIGN KEY (taxonomy_id) REFERENCES taxonomy(id), 30 | FOREIGN KEY (tag_id) REFERENCES tags(tag_id), 31 | FOREIGN KEY (entity_id) REFERENCES :t2d_entity_table(:t2d_entity_id_column) 32 | ); 33 | 34 | COMMENT ON TABLE :t2d_intxn_table_name IS 'Intersection table that marks which tags and values where set on an entity in a given timespan'; 35 | COMMENT ON COLUMN :t2d_intxn_table_name.entity_id IS 'Foreign key in the table of entities'; 36 | COMMENT ON COLUMN :t2d_intxn_table_name.tag_id IS 'Foreign Key in table "tags"'; 37 | COMMENT ON COLUMN :t2d_intxn_table_name.start_date IS 'Startdate as int (YYYYMMDD)'; 38 | COMMENT ON COLUMN :t2d_intxn_table_name.end_date IS 'Enddate as int (YYYYMMDD)'; 39 | COMMENT ON COLUMN :t2d_intxn_table_name.taxonomy_id IS 'Foreign Key in table "taxonomy"'; 40 | COMMENT ON COLUMN :t2d_intxn_table_name.value_id IS 'Foreign Key in table "taxonomy_tag_val"'; 41 | COMMENT ON COLUMN :t2d_intxn_table_name.measured_at IS 'Last time the tag was measured'; 42 | COMMENT ON COLUMN :t2d_intxn_table_name.start_ts IS 'Start date/time of tag'; 43 | COMMENT ON COLUMN :t2d_intxn_table_name.end_ts IS 'End date/time of tag'; 44 | COMMENT ON COLUMN :t2d_intxn_table_name.producer IS 'Name of producer that measured the tag'; 45 | 46 | CREATE INDEX ON :t2d_intxn_table_name USING btree (entity_id); 47 | CREATE INDEX ON :t2d_intxn_table_name USING btree (end_ts DESC); 48 | CREATE INDEX ON :t2d_intxn_table_name USING btree (start_ts DESC); 49 | CREATE INDEX ON :t2d_intxn_table_name USING btree (tag_id); -------------------------------------------------------------------------------- /scripts/db/create_intersection_table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | 5 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | cd "$SCRIPT_DIR" 7 | 8 | function error_exit { 9 | >&2 echo "ERROR: $1" 10 | exit 1 11 | } 12 | 13 | if [ -z "$POSTGRES_USER" ]; then 14 | error_exit "POSTGRES_USER is not set" 15 | fi 16 | 17 | if [ -z "$POSTGRES_DB" ]; then 18 | error_exit "POSTGRES_DB is not set" 19 | fi 20 | 21 | if [ -z "$TAG2DOMAIN_SCHEMA" ]; then 22 | error_exit "TAG2DOMAIN_SCHEMA is not set" 23 | fi 24 | 25 | if [ -z "$TAG2DOMAIN_INTXN_TABLE_NAME" ]; then 26 | error_exit "TAG2DOMAIN_INTXN_TABLE_NAME is not set" 27 | fi 28 | 29 | if [ -z "$TAG2DOMAIN_ENTITY_TABLE" ]; then 30 | error_exit "TAG2DOMAIN_ENTITY_TABLE is not set" 31 | fi 32 | 33 | if [ -z "$TAG2DOMAIN_ENTITY_ID_COLUMN" ]; then 34 | error_exit "TAG2DOMAIN_ENTITY_ID_COLUMN is not set" 35 | fi 36 | 37 | if [ -z "$TAG2DOMAIN_TAG_TYPE" ]; then 38 | error_exit "TAG2DOMAIN_TAG_TYPE is not set" 39 | fi 40 | 41 | if [[ $TAG2DOMAIN_TAG_TYPE == *"."* ]]; then 42 | error_exit " can not contain a ." 43 | fi 44 | 45 | function run_psql_script { 46 | psql \ 47 | -v ON_ERROR_STOP=1 \ 48 | -v t2d_schema="$TAG2DOMAIN_SCHEMA" \ 49 | -v t2d_intxn_table_name="$TAG2DOMAIN_INTXN_TABLE_NAME" \ 50 | -v t2d_entity_table="$TAG2DOMAIN_ENTITY_TABLE" \ 51 | -v t2d_entity_id_column="$TAG2DOMAIN_ENTITY_ID_COLUMN" \ 52 | -v t2d_tag_type="$TAG2DOMAIN_TAG_TYPE" \ 53 | --username "$POSTGRES_USER" \ 54 | --dbname "$POSTGRES_DB" \ 55 | -f "$1" 56 | } 57 | 58 | if [ -n "$POSTGRES_HOST" ]; then 59 | export PGHOST="$POSTGRES_HOST" 60 | fi 61 | 62 | if [ -n "$POSTGRES_PORT" ]; then 63 | export PGPORT="$POSTGRES_PORT" 64 | fi 65 | 66 | if [ -n "$POSTGRES_PASSWORD_FILE" ]; then 67 | export PGPASSFILE="$POSTGRES_PASSWORD_FILE" 68 | fi 69 | 70 | run_psql_script "create_intersection.sql" -------------------------------------------------------------------------------- /scripts/db/create_intxn_table_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | cd "$SCRIPT_DIR" 6 | 7 | function error_exit { 8 | >&2 echo "ERROR: $1" 9 | exit 1 10 | } 11 | 12 | if [ -z "$TAG2DOMAIN_TAG_TYPE" ]; then 13 | error_exit "TAG2DOMAIN_TAG_TYPE is not set" 14 | fi 15 | 16 | if [ -z "$TAG2DOMAIN_INTXN_TABLE_NAME" ]; then 17 | error_exit "TAG2DOMAIN_INTXN_TABLE_NAME is not set" 18 | fi 19 | 20 | if [[ $TAG2DOMAIN_TAG_TYPE == *"."* ]]; then 21 | error_exit " can not contain a ." 22 | fi 23 | 24 | ESCAPED_TAG_TYPE="$(printf '%s\n' "$TAG2DOMAIN_TAG_TYPE" | sed -e 's/[\/&]/\\&/g')" 25 | ESCAPED_INTXN_TABLE_NAME="$(printf '%s\n' "$TAG2DOMAIN_INTXN_TABLE_NAME" | sed -e 's/[\/&]/\\&/g')" 26 | 27 | cat "intersection_table_config.template" \ 28 | | sed "s/{TAG2DOMAIN_INTXN_TABLE_NAME}/$ESCAPED_INTXN_TABLE_NAME/g" \ 29 | | sed "s/{TAG TYPE}/$ESCAPED_TAG_TYPE/g" -------------------------------------------------------------------------------- /scripts/db/intersection_table_config.template: -------------------------------------------------------------------------------- 1 | [db.intxn_table.{TAG TYPE}] 2 | table_name={TAG2DOMAIN_INTXN_TABLE_NAME} 3 | id=entity_id 4 | taxonomy_id=taxonomy_id 5 | tag_id=tag_id 6 | value_id=value_id 7 | measured_at=measured_at 8 | producer=producer 9 | start_date=start_date 10 | end_date=end_date 11 | start_ts=start_ts 12 | end_ts=end_ts 13 | -------------------------------------------------------------------------------- /scripts/util/taxonomy_postgresql_inserts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | from argparse import ArgumentParser 4 | import yaml 5 | from pprint import PrettyPrinter 6 | from collections import OrderedDict 7 | import json 8 | 9 | pprinter = PrettyPrinter() 10 | pprint = pprinter.pprint 11 | 12 | 13 | def error(s): 14 | sys.stderr.write(s) 15 | sys.exit(1) 16 | 17 | 18 | class InvalidSpec(Exception): 19 | pass 20 | 21 | 22 | def value_escape(value): 23 | if isinstance(value, str): 24 | s = value.replace("'", "''") 25 | return "'" + s + "'" 26 | elif value is None: 27 | return "NULL" 28 | else: 29 | return str(value) 30 | 31 | 32 | def check_extra_keys(d, path=None): 33 | if len(d) > 0: 34 | if path is not None: 35 | s = "extra keys in %s: %s" % (path, ', '.join(d.keys())) 36 | else: 37 | s = "extra keys: %s" % (', '.join(d.keys())) 38 | raise InvalidSpec(s) 39 | 40 | 41 | def gen_taxonomy_stmts(taxonomy, args): 42 | namespace = args.namespace 43 | 44 | # Fetch the parameters 45 | taxonomy_pars = OrderedDict() 46 | 47 | # Description 48 | taxonomy_pars["name"] = get_value(taxonomy, "name", type_=str) 49 | taxonomy_pars["description"] = \ 50 | get_value(taxonomy, "description", type_=str) 51 | taxonomy_pars["url"] = get_value(taxonomy, "url", type_=str) 52 | taxonomy_pars["is_actionable"] = \ 53 | get_value(taxonomy, "is_actionable", type_=float) 54 | 55 | # Flags 56 | flags = get_value(taxonomy, "flags", optional=True, default={}) 57 | taxonomy_pars["allows_auto_tags"] = get_value( 58 | flags, 59 | "allows_auto_tags", 60 | optional=True, 61 | default=False, 62 | type_=bool 63 | ) 64 | 65 | taxonomy_pars["allows_auto_values"] = get_value( 66 | flags, 67 | "allows_auto_values", 68 | optional=True, 69 | default=False, 70 | type_=bool 71 | ) 72 | 73 | taxonomy_pars["for_numbers"] = get_value( 74 | flags, 75 | "for_numbers", 76 | optional=True, 77 | default=True, 78 | type_=bool 79 | ) 80 | 81 | taxonomy_pars["for_domains"] = get_value( 82 | flags, 83 | "for_domains", 84 | optional=True, 85 | default=True, 86 | type_=bool 87 | ) 88 | 89 | taxonomy_pars["is_automatically_classifiable"] = get_value( 90 | flags, 91 | "is_automatically_classifiable", 92 | optional=True, 93 | default=True, 94 | type_=bool 95 | ) 96 | 97 | taxonomy_pars["is_stable"] = get_value( 98 | flags, 99 | "is_stable", 100 | optional=True, 101 | default=False, 102 | type_=bool 103 | ) 104 | 105 | check_extra_keys(flags, "taxonomy.flags") 106 | 107 | if len(flags) > 0: 108 | raise InvalidSpec( 109 | "unknown key(s) in flags: %s" % ', '.join(flags.keys()) 110 | ) 111 | 112 | stmts = """ 113 | DO $$ 114 | DECLARE taxonomy_id integer; 115 | DECLARE last_tag_id integer; 116 | BEGIN""".replace("\n ", "\n").split("\n") 117 | stmts.append( 118 | "INSERT INTO %(namespace)s.taxonomy (%(keys)s) VALUES(%(values)s) RETURNING id INTO taxonomy_id;" % ( 119 | { 120 | "namespace": namespace, 121 | "keys": ','.join(taxonomy_pars.keys()), 122 | "values": ','.join(map(value_escape, taxonomy_pars.values())) 123 | } 124 | ) 125 | ) 126 | 127 | # tags 128 | tags = get_value(taxonomy, "tags", optional=True, default=[]) 129 | 130 | check_extra_keys(taxonomy, "taxonomy") 131 | 132 | for i, _tag in enumerate(tags): 133 | stmts += gen_tag_stmts(_tag, args, "taxonomy.tags[%i]" % i) 134 | 135 | stmts.append("END $$") 136 | 137 | return stmts 138 | 139 | 140 | def gen_tag_stmts(tag, args, path): 141 | namespace = args.namespace 142 | 143 | # Fetch the parameters 144 | tag_pars = OrderedDict() 145 | 146 | # Description 147 | tag_pars["tag_name"] = get_value(tag, "name", type_=str) 148 | tag_pars["tag_description"] = get_value(tag, "description", type_=str) 149 | tag_pars["extras"] = \ 150 | get_value(tag, "extras", optional=True, default={}, type_=json.dumps) 151 | 152 | stmts = [] 153 | stmts.append("-- tag %s" % tag_pars["tag_name"]) 154 | stmts.append("INSERT INTO %(namespace)s.tags (%(keys)s) VALUES (%(values)s) RETURNING tag_id INTO last_tag_id;" % ( 155 | { 156 | "namespace": namespace, 157 | "keys": ','.join(["taxonomy_id", ] + list(tag_pars.keys())), 158 | "values": ','.join( 159 | ["taxonomy_id", ] + list(map(value_escape, tag_pars.values())) 160 | ) 161 | } 162 | )) 163 | 164 | values = get_value(tag, "values", default=[], optional=True) 165 | 166 | check_extra_keys(tag, "%s" % path) 167 | 168 | for i, _value in enumerate(values): 169 | stmts += gen_value_stmts(_value, args, "%s.values[%i]" % (path, i)) 170 | 171 | return stmts 172 | 173 | 174 | def gen_value_stmts(value, args, path): 175 | namespace = args.namespace 176 | 177 | if not isinstance(value, str): 178 | raise InvalidSpec( 179 | "invalid value in %s: expected string got %s" % ( 180 | path, 181 | type(value).__name__ 182 | ) 183 | ) 184 | 185 | # Fetch the parameters 186 | value_pars = OrderedDict() 187 | value_pars["value"] = value 188 | 189 | stmts = [] 190 | stmts.append("INSERT INTO %(namespace)s.taxonomy_tag_val (%(keys)s) VALUES (%(values)s);" % ( 191 | { 192 | "namespace": namespace, 193 | "keys": ','.join(["tag_id", ] + list(value_pars.keys())), 194 | "values": ','.join( 195 | ["last_tag_id", ] + list(map(value_escape, value_pars.values())) 196 | ) 197 | } 198 | )) 199 | 200 | return stmts 201 | 202 | 203 | def get_value( 204 | d, key, 205 | optional=False, 206 | default=None, 207 | parent=None, 208 | type_=None, 209 | consume=True 210 | ): 211 | try: 212 | value = d[key] 213 | if consume: 214 | del(d[key]) 215 | except KeyError: 216 | if optional: 217 | value = default 218 | else: 219 | if parent: 220 | s = "could not find required key %s.%s" % (parent, key) 221 | else: 222 | s = "could not find required key %s" % key 223 | raise InvalidSpec(s) 224 | 225 | if type_ is not None: 226 | try: 227 | cast_value = type_(value) 228 | value = cast_value 229 | except ValueError as e: 230 | raise InvalidSpec( 231 | "could not convert value '%s' to type %s - %s" % ( 232 | value, 233 | type(type_), 234 | str(e) 235 | ) 236 | ) 237 | 238 | return value 239 | 240 | 241 | def parse_taxonomy(specfile): 242 | try: 243 | data = yaml.safe_load(specfile) 244 | except Exception as e: 245 | sys.stderr.write("could not parse yaml: %s" % str(e)) 246 | sys.exit(1) 247 | 248 | taxonomy = get_value(data, "taxonomy") 249 | 250 | return taxonomy 251 | 252 | 253 | def run(args): 254 | 255 | taxonomy = parse_taxonomy(args.specfile) 256 | 257 | stmts = gen_taxonomy_stmts(taxonomy, args) 258 | 259 | for _stmt in stmts: 260 | print(_stmt) 261 | 262 | 263 | if __name__ == '__main__': 264 | parser = ArgumentParser( 265 | description="Create the PostgreSQL statements that create a " 266 | "tag2domain taxonomy" 267 | ) 268 | 269 | parser.add_argument( 270 | "specfile", 271 | help="path of specification file", 272 | type=open 273 | ) 274 | 275 | parser.add_argument( 276 | "-n", 277 | "--namespace", 278 | help="namespace the tag2domain tables live in", 279 | type=str, 280 | default="tag2domain" 281 | ) 282 | 283 | args = parser.parse_args() 284 | 285 | try: 286 | run(args) 287 | except InvalidSpec as e: 288 | error("error in spec file - %s" % str(e)) 289 | except Exception as e: 290 | traceback.print_exc() 291 | error("fatal error: %s" % str(e)) 292 | -------------------------------------------------------------------------------- /static/cef_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/static/cef_logo.png -------------------------------------------------------------------------------- /tag2domain_api/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # Run me as: 4 | # 5 | # docker build -t tag2domain_api:0.4 . 6 | # 7 | # Then run the image via: 8 | # docker run -d -p 80:8001 -e PORT="8001" --name api -it tag2domain_api:0.4 9 | 10 | FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7 11 | 12 | EXPOSE 8001 13 | 14 | COPY ./requirements.txt /app/requirements.txt 15 | RUN pip install -r /app/requirements.txt 16 | 17 | COPY ./app /app/tag2domain_api/app 18 | 19 | HEALTHCHECK --interval=30s --timeout=3s CMD curl -f http://localhost:8001/test/self-test || exit 1 20 | -------------------------------------------------------------------------------- /tag2domain_api/Dockerfile.msm2tag: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # Run me as: 4 | # 5 | # docker build -t tag2domain_api:0.4 . 6 | # 7 | # Then run the image via: 8 | # docker run -d -p 80:8001 -e PORT="8001" --name api -it tag2domain_api:0.4 9 | 10 | FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7 11 | 12 | EXPOSE 8001 13 | 14 | COPY ./tag2domain_api/requirements.txt /app/requirements.txt 15 | COPY ./py_tag2domain/requirements.txt /app/requirements.py_tag2domain.txt 16 | 17 | RUN pip install -r /app/requirements.txt && pip install -r /app/requirements.py_tag2domain.txt 18 | 19 | COPY ./tag2domain_api/app /app/tag2domain_api/app 20 | COPY ./py_tag2domain /app/py_tag2domain 21 | 22 | HEALTHCHECK --interval=30s --timeout=3s CMD curl -f http://localhost:8001/test/self-test || exit 1 23 | -------------------------------------------------------------------------------- /tag2domain_api/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tag2domain_api/app/__init__.py -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tag2domain_api/app/api_v1/__init__.py -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/api.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from tag2domain_api.app.api_v1.endpoints import ( 4 | util, 5 | domains, 6 | stats, 7 | meta, 8 | bydomain, 9 | filters, 10 | msm2tag 11 | ) 12 | 13 | router = APIRouter() 14 | router.include_router( 15 | util.router, 16 | tags=["Util"] 17 | ) 18 | router.include_router( 19 | meta.router, 20 | prefix="/meta", 21 | tags=["List Taxonomy Information"] 22 | ) 23 | router.include_router( 24 | filters.router, 25 | prefix="/filters", 26 | tags=["List Filter Information"] 27 | ) 28 | router.include_router( 29 | bydomain.router, 30 | prefix="/bydomain", 31 | tags=["List Domain Tags"] 32 | ) 33 | router.include_router( 34 | domains.router, 35 | prefix="/domains", 36 | tags=["List Domains"] 37 | ) 38 | router.include_router( 39 | msm2tag.router, 40 | prefix="/msm2tag", 41 | tags=["Add Tags"] 42 | ) 43 | router.include_router( 44 | stats.router, 45 | prefix="/stats", 46 | tags=["Statistics"] 47 | ) 48 | -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/endpoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tag2domain_api/app/api_v1/endpoints/__init__.py -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/endpoints/bydomain.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from fastapi import APIRouter 5 | from typing import List 6 | 7 | from tag2domain_api.app.util.models import TagsOfDomainsResponse 8 | from tag2domain_api.app.util.config import config 9 | from tag2domain_api.app.util.db import execute_db, get_sql_base_table 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | router = APIRouter() 14 | 15 | 16 | @router.get( 17 | "/{domain}", 18 | response_model=List[TagsOfDomainsResponse], 19 | name="taxonomies_by_domain", 20 | summary="Show all open tags of a given domain at a single point in time" 21 | ) 22 | def get_tags_by_domain( 23 | domain: str, 24 | at_time: datetime.datetime = None, 25 | limit: int = config['default_limit'], 26 | offset: int = config['default_offset'] 27 | ): 28 | """ Returns all taxonomies and tags of a given {domain} 29 | 30 | **GET Parameters:** 31 | * domain ... the domain name to query (required) 32 | * at_time .. reference time to look at. If empty, open tags are returned. 33 | (YYYY-MM-DDTHH:mm:ss) 34 | * limit .... how many entries should we return? 35 | * offset.... starting at {offset} 36 | 37 | **Output (JSON):** 38 | * domain name... string 39 | * domain_id ... ID of the domain 40 | * tag_id... int 41 | * tag_name ... name of the tag 42 | * taxonomy_id ... int 43 | * taxonomy_name ... name of the linked taxonomy 44 | """ 45 | parameters = { 46 | "domain": domain, 47 | "limit": limit, 48 | "offset": offset, 49 | "at_time": at_time 50 | } 51 | base_table, base_table_params = get_sql_base_table(at_time, domain=domain) 52 | parameters.update(base_table_params) 53 | 54 | SQL = """ 55 | SELECT 56 | taxonomy_id, 57 | taxonomy.name AS taxonomy_name, 58 | tag_table.tag_id, 59 | tag_name, 60 | value_id, 61 | value, 62 | start_time, 63 | measured_at, 64 | end_time 65 | FROM %s AS tag_table -- base_table 66 | JOIN tags USING (tag_id) 67 | JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) 68 | LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) 69 | ORDER BY domain_id, tag_table.tag_id asc 70 | LIMIT %%(limit)s OFFSET %%(offset)s""" % (base_table) 71 | rows = execute_db(SQL, parameters, dict_=True) 72 | return rows 73 | 74 | 75 | @router.get( 76 | "/{domain}/history", 77 | response_model=List[TagsOfDomainsResponse], 78 | name="taxonomies_by_domain", 79 | summary="Return the tag history of a domain" 80 | ) 81 | def get_tag_history_by_domain( 82 | domain: str, 83 | limit: int = config['default_limit'], 84 | offset: int = config['default_offset'] 85 | ): 86 | """ Returns the tag history of a single domain. 87 | 88 | **GET Parameters:** 89 | * domain ... the domain name to query (required) 90 | * limit .... how many entries should we return? 91 | * offset.... starting at {offset} 92 | 93 | **Output (JSON):** 94 | * domain name... string 95 | * domain_id ... ID of the domain 96 | * tag_id... int 97 | * tag_name ... name of the tag 98 | * value_id ... ID of the value associated with the tag 99 | * value ... value associated with the tag 100 | * taxonomy_id ... int 101 | * taxonomy_name ... name of the linked taxonomy 102 | """ 103 | parameters = { 104 | "domain": domain, 105 | "limit": limit, 106 | "offset": offset 107 | } 108 | 109 | SQL = """ 110 | SELECT 111 | taxonomy_id, 112 | taxonomy.name AS taxonomy_name, 113 | tag_table.tag_id, 114 | tag_name, 115 | value_id, 116 | value, 117 | start_time, 118 | measured_at, 119 | end_time 120 | FROM tag2domain_get_all_tags_domain(%(domain)s) AS tag_table 121 | JOIN tags USING (tag_id) 122 | JOIN taxonomy ON (tags.taxonomy_id = taxonomy.id) 123 | LEFT JOIN taxonomy_tag_val ON (tag_table.value_id = taxonomy_tag_val.id) 124 | ORDER BY domain_id, tag_id ASC 125 | LIMIT %(limit)s OFFSET %(offset)s 126 | """ 127 | rows = execute_db(SQL, parameters, dict_=True) 128 | return rows 129 | -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/endpoints/filters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from fastapi import APIRouter 4 | from typing import List 5 | 6 | from tag2domain_api.app.util.db import execute_db 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | router = APIRouter() 11 | 12 | 13 | @router.get( 14 | "/types", 15 | response_model=List[str], 16 | name="Filter types", 17 | summary="Show all available filter types" 18 | ) 19 | def get_types(): 20 | """ Returns filter categories defined in the filter table. 21 | 22 | **Output (JSON list):** 23 | str - name of filter 24 | """ 25 | rows = execute_db(""" 26 | SELECT DISTINCT ON (tag_name) 27 | tag_name 28 | FROM v_tag2domain_domain_filter 29 | ORDER BY tag_name 30 | """, ()) 31 | return [name for name, in rows] 32 | 33 | 34 | @router.get( 35 | "/values", 36 | response_model=List[str], 37 | name="Filter values", 38 | summary="Show all values found for a given filter" 39 | ) 40 | def get_values(filter: str): 41 | """ Returns filter values found in the filter table. 42 | 43 | **Output (JSON list):** 44 | str - value 45 | """ 46 | rows = execute_db( 47 | """ 48 | SELECT DISTINCT ON (value) 49 | value 50 | FROM v_tag2domain_domain_filter 51 | WHERE (tag_name = %s) AND (value IS NOT NULL) 52 | ORDER BY value 53 | """, 54 | (filter, ) 55 | ) 56 | 57 | return [name for name, in rows] 58 | -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/endpoints/msm2tag.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | from fastapi import APIRouter, HTTPException, Body 5 | 6 | from tag2domain_api.app.util.config import config 7 | from tag2domain_api.app.util.db import get_db 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | router = APIRouter() 12 | 13 | if "ENABLE_MSM2TAG" in config and config["ENABLE_MSM2TAG"] is True: 14 | logger.info("enabling msm2tag endpoints") 15 | from py_tag2domain.db import Psycopg2Adapter 16 | from py_tag2domain.msm2tags import MeasurementToTags 17 | from py_tag2domain.schema.measurement import MeasurementModel 18 | from py_tag2domain.exceptions import ( 19 | InvalidMeasurementException, 20 | StaleMeasurementException 21 | ) 22 | from py_tag2domain.util import parse_config 23 | 24 | if config["MSM2TAG_DB_CONFIG"] is None: 25 | logger.error("using msm2tag requires MSM2TAG_DB_CONFIG option " 26 | "to point to a config file") 27 | raise RuntimeError("no msm2tag db config found") 28 | else: 29 | _, intxn_table_mappings = parse_config(config["MSM2TAG_DB_CONFIG"]) 30 | max_measurement_age = config["MSM2TAG_MAX_MEASUREMENT_AGE"] 31 | 32 | @router.post("/") 33 | async def msm2tag( 34 | msm: MeasurementModel = Body( 35 | ..., 36 | example={ 37 | "version": "1", 38 | "tag_type": "intersection", 39 | "tagged_id": 3, 40 | "taxonomy": "colors", 41 | "producer": "test", 42 | "measured_at": "2020-12-22T12:35:32", 43 | "measurement_id": "test/12345", 44 | "tags": [ 45 | { 46 | "tag": "rgb::blue" 47 | }, 48 | { 49 | "tag": "cmyk::black" 50 | }, 51 | ] 52 | } 53 | ) 54 | ): 55 | _db_adapter = Psycopg2Adapter( 56 | get_db(), 57 | intxn_table_mappings, 58 | logger=logger 59 | ) 60 | 61 | _msm2tags = MeasurementToTags( 62 | _db_adapter, 63 | logger=logger, 64 | max_measurement_age=max_measurement_age 65 | ) 66 | try: 67 | logger.debug(msm.dict(exclude_unset=True)) 68 | _msm2tags.handle_measurement( 69 | msm.dict(exclude_unset=True), 70 | skip_validation=True 71 | ) 72 | except (InvalidMeasurementException, StaleMeasurementException) as e: 73 | logger.info("error 400: " + str(e)) 74 | logger.debug(traceback.format_exc()) 75 | raise HTTPException(status_code=400, detail=str(e)) 76 | 77 | return {"message": "OK"} 78 | -------------------------------------------------------------------------------- /tag2domain_api/app/api_v1/endpoints/util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from fastapi import APIRouter 4 | 5 | from tag2domain_api.app.util.config import config 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | router = APIRouter() 10 | 11 | HELPSTR = """ 12 | # About 13 | 14 | This app provides a simple RESTful API for the tag2domain DB. 15 | Please see %s 16 | 17 | Example: 18 | ``` 19 | GET /api/v1/tags/all?limit=1000&offset=1000 20 | ``` 21 | Will return all results of the tags table starting at offset 1000 and only send 22 | back 1000 results. The default value for limit is 1000. 23 | 24 | Error codes: 25 | HTTP 200 --> OK 26 | HTTP 204 --> OK but no results 27 | HTTP 401 --> not authorized 28 | HTTP 408 --> timeout 29 | HTTP 404 --> invalid endpoint or request 30 | HTTP 500 --> internal server error 31 | """ % (config['baseurl'],) 32 | 33 | 34 | @router.get('/') 35 | async def help(): 36 | return {'help': HELPSTR} 37 | -------------------------------------------------------------------------------- /tag2domain_api/app/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tag2domain_api/app/common/__init__.py -------------------------------------------------------------------------------- /tag2domain_api/app/common/meta.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | import logging 3 | 4 | from tag2domain_api.app.util.config import config 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get( 12 | "/version", 13 | name="Service version", 14 | summary="Return the version of the service" 15 | ) 16 | async def get_version(): 17 | return {"version": config["version"]} 18 | 19 | 20 | @router.get( 21 | "/api-versions", 22 | name="API versions", 23 | summary="Return the API versions available" 24 | ) 25 | async def get_api_versions(): 26 | return [{"version": "v1"}, ] 27 | -------------------------------------------------------------------------------- /tag2domain_api/app/common/test.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, HTTPException 2 | import logging 3 | 4 | from tag2domain_api.app.util.db import execute_db 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get( 12 | "/ping", 13 | name="Ping test", 14 | summary="Run a ping test, to check if the service is running" 15 | ) 16 | async def ping(): 17 | return {"message": "Pong!"} 18 | 19 | 20 | @router.get( 21 | "/self-test", 22 | name="Self-test", 23 | summary="Run a self-test" 24 | ) 25 | async def selftest(): 26 | try: 27 | execute_db("SELECT 1", ()) 28 | except Exception: 29 | raise HTTPException(status_code=503, detail="failed DB execute") 30 | return {"message": "OK"} 31 | -------------------------------------------------------------------------------- /tag2domain_api/app/dotenv.sample.txt: -------------------------------------------------------------------------------- 1 | DB= 2 | DBHOST= 3 | DBUSER= 4 | DBPASS= 5 | -------------------------------------------------------------------------------- /tag2domain_api/app/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | 4 | from fastapi import FastAPI, HTTPException 5 | 6 | import tag2domain_api.app.util.logging 7 | from tag2domain_api.app.util.config import config, description 8 | from tag2domain_api.app.util.db import connect_db, disconnect_db 9 | 10 | from tag2domain_api.app.api_v1.api import router as router_api_v1 11 | from tag2domain_api.app.common.test import router as router_test 12 | from tag2domain_api.app.common.meta import router as router_meta 13 | 14 | tag2domain_api.app.util.logging.setup() 15 | logger = logging.getLogger(__name__) 16 | 17 | ############################################################################### 18 | # Setup fastAPI app 19 | app = FastAPI( 20 | title="tag2domain API", 21 | version=config['version'], 22 | description=description 23 | ) 24 | 25 | app.include_router(router_test, prefix="/test", tags=["Test"]) 26 | app.include_router(router_meta, prefix="/meta", tags=["Meta"]) 27 | app.include_router(router_api_v1, prefix="/api/v1") 28 | 29 | 30 | @app.on_event('startup') 31 | def get_db(): 32 | """Opens a new database connection if there is none yet for the 33 | current application context. 34 | :rtype: psycopg2 connection""" 35 | 36 | logger.info('starting up....') 37 | logger.debug(config) 38 | try: 39 | connect_db(config) 40 | except RuntimeError as e: 41 | logger.error("error connecting to DB", exc_info=True) 42 | raise HTTPException(status_code=500, detail=str(e)) 43 | 44 | 45 | @app.on_event('shutdown') 46 | def close_db(): 47 | """Closes the database again at the end of the request.""" 48 | logger.info('shutting down....') 49 | logger.info('disconnecting from DB...') 50 | disconnect_db() 51 | 52 | 53 | if __name__ == "__main__": 54 | logger.basicConfig(level=config['loglevel']) 55 | conn = get_db() 56 | -------------------------------------------------------------------------------- /tag2domain_api/app/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tag2domain_api/app/util/__init__.py -------------------------------------------------------------------------------- /tag2domain_api/app/util/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv, find_dotenv 3 | 4 | 5 | """ 6 | Config file 7 | """ 8 | 9 | load_dotenv(find_dotenv(), verbose=True) 10 | 11 | description = """ 12 | ## A RESTful API for querying the tag2domain DB. 13 | 14 | You can query the database of mappings of domains to tags. 15 | See [the github repository](https://github.com/certtools/tag2domain) for 16 | further information. 17 | 18 | Authors: Aaron Kaplan <[kaplan@nic.at](kaplan@nic.at)> and 19 | [Clemens Moritz](https://github.com/cleboo) 20 | 21 | **Copyright** 2021 (C) nic.at GmbH, all rights reserved. 22 | 23 | **License**: GNU Affero General Public License v3.0. See the LICENSE file for 24 | details. 25 | 26 | """ 27 | 28 | config = dict() 29 | config.update(dict( 30 | version='0.8.4', 31 | # valid values: DEBUG, INFO, WARN, ERROR (same as default python logging) 32 | loglevel=os.getenv('LOG_LEVEL', 'INFO'), 33 | baseurl='http://localhost:' + os.getenv('PORT', '80'), 34 | default_limit=1000, 35 | default_offset=0, 36 | # DB stuff 37 | DBHOST=os.getenv('DBHOST', 'localhost'), 38 | DBPORT=os.getenv('DBPORT', '5432'), 39 | DATABASE=os.getenv('DB'), 40 | DBUSER=os.getenv('DBUSER'), 41 | DBPASSWORD=os.getenv('DBPASSWORD'), 42 | DBSSLMODE=os.getenv('DBSSLMODE', 'require'), 43 | DBTAG2DOMAIN_SCHEMA=os.getenv('DBTAG2DOMAIN_SCHEMA', 'tag2domain'), 44 | ENABLE_MSM2TAG=(os.getenv('ENABLE_MSM2TAG', False) == 'True'), 45 | MSM2TAG_MAX_MEASUREMENT_AGE=os.getenv('MSM2TAG_MAX_MEASUREMENT_AGE', None), 46 | MSM2TAG_DB_CONFIG=os.getenv('MSM2TAG_DB_CONFIG', None) 47 | )) 48 | 49 | if __name__ == "__main__": 50 | print(config) 51 | -------------------------------------------------------------------------------- /tag2domain_api/app/util/db.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import re 4 | import copy 5 | import random 6 | 7 | import psycopg2 8 | import psycopg2.extras 9 | 10 | _db_conn = None 11 | _db_config = None 12 | _config = None 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | RE_FILTER = ( 17 | "^(?:(?P[A-Za-z\-0-9 ]+):)?(?:(?P[A-Za-z\-0-9:]+)::)?(?P[A-Za-z0-9-]+)?(?:=(?P[A-Za-z0-9. ?:_=\\/]+))?$" # noqa:W605 18 | ) 19 | COMPILED_RE_FILTER = re.compile(RE_FILTER) 20 | 21 | 22 | def get_db(): 23 | """ 24 | Returns an already opened database connection or None 25 | """ 26 | return _db_conn 27 | 28 | 29 | def get_db_cursor(): 30 | """ 31 | Returns a cursor 32 | """ 33 | if _db_conn is None: 34 | raise RuntimeError("no DB connected") 35 | return _db_conn.cursor() 36 | 37 | 38 | def get_db_dict_cursor(): 39 | """ 40 | Returns a psycopg2.extras.RealDictCursor instance 41 | """ 42 | if _db_conn is None: 43 | raise RuntimeError("no DB connected") 44 | return _db_conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) 45 | 46 | 47 | def execute_db(query, params=None, dict_=False, handle_failure=True): 48 | """ 49 | Executes a DB statement and returns the results 50 | """ 51 | 52 | _log_id = random.randint(0, 32768) 53 | try: 54 | if dict_: 55 | cursor = get_db_dict_cursor() 56 | else: 57 | cursor = get_db_cursor() 58 | 59 | if params is None: 60 | logger.debug(query) 61 | else: 62 | if isinstance(params, dict): 63 | logger.debug(query, params) 64 | else: 65 | logger.debug(query, *params) 66 | 67 | logger.debug(str(_log_id) + " - executing query...") 68 | start = time.time() 69 | cursor.execute(query, params) 70 | logger.debug(str(_log_id) + " - done - %f s", time.time() - start) 71 | logger.debug(str(_log_id) + " - fetching result...") 72 | start = time.time() 73 | rows = cursor.fetchall() 74 | get_db().commit() # immediately end transaction 75 | logger.debug(str(_log_id) + " - done - %f s", time.time() - start) 76 | except (psycopg2.Error, RuntimeError) as e: 77 | if handle_failure: 78 | logger.debug("failed DB stmt (%s) - reconnecting", str(e)) 79 | time.sleep(2) 80 | connect_db() 81 | rows = execute_db(query, params, dict_=dict_, handle_failure=False) 82 | else: 83 | raise RuntimeError("could not execute statement - %s" % str(e)) 84 | return rows 85 | 86 | 87 | def connect_db(config=None): 88 | """Connects to the specific database. 89 | :rtype: psycopg2 connection""" 90 | global _db_conn 91 | global _db_config 92 | global _config 93 | if _db_conn is not None: 94 | logger.debug("closing previous DB connection") 95 | try: 96 | _db_conn.rollback() 97 | _db_conn.close() 98 | except psycopg2.Error as e: 99 | logger.debug( 100 | "encountered error in close before connect: %s", str(e.pgerror) 101 | ) 102 | _db_conn = None 103 | 104 | if config is None: 105 | if _db_config is None: 106 | raise ValueError("No config given") 107 | logger.debug("reusing previous DB config") 108 | db_config = _db_config 109 | else: 110 | db_config = dict( 111 | dbname=config['DATABASE'], 112 | user=config['DBUSER'], 113 | password=config['DBPASSWORD'], 114 | host=config['DBHOST'], 115 | port=config['DBPORT'], 116 | application_name="tag2domain_api", 117 | sslmode=config['DBSSLMODE'], 118 | options='-c search_path=%s' % config['DBTAG2DOMAIN_SCHEMA'] 119 | ) 120 | _config = copy.deepcopy(config) 121 | 122 | def filter(key, value): 123 | if key in ["password", ]: 124 | return "" 125 | else: 126 | return value 127 | logger.debug( 128 | "DB config: " + str({ 129 | key: filter(key, value) 130 | for key, value in db_config.items() 131 | }) 132 | ) 133 | try: 134 | conn = psycopg2.connect(**db_config) 135 | except Exception as ex: 136 | time.sleep(2) 137 | raise RuntimeError( 138 | "could not connect to the DB. Reason: %s" % (str(ex)) 139 | ) 140 | 141 | # Disable transactions and set to read only if ENABLE_MSM2TAG is disabled 142 | if ( 143 | ("ENABLE_MSM2TAG" not in _config) 144 | or (not _config["ENABLE_MSM2TAG"] is True) 145 | ): 146 | logger.info("accessing DB in read-only mode") 147 | conn.set_session(readonly=True) 148 | else: 149 | logger.info("accessing DB in read-write mode") 150 | 151 | logger.info( 152 | "connected to DB '%s' on '%s'" % ( 153 | db_config['dbname'], 154 | db_config['host'] 155 | ) 156 | ) 157 | _db_conn = conn 158 | _db_config = db_config 159 | 160 | return _db_conn 161 | 162 | 163 | def set_db(db_conn): 164 | global _db_conn 165 | global _db_config 166 | global _config 167 | _db_conn = db_conn 168 | _db_config = None 169 | _config = None 170 | 171 | 172 | def disconnect_db(): 173 | global _db_conn 174 | _db_conn.close() 175 | _db_conn = None 176 | 177 | 178 | def get_sql_base_table(at_time, filter=None, domain=None): 179 | if filter is not None and domain is not None: 180 | raise ValueError( 181 | "filtering by domain and by filter is not implemented" 182 | ) 183 | if filter is not None: 184 | m = COMPILED_RE_FILTER.match(filter) 185 | if not m: 186 | raise ValueError("invalid filter clause - '%s'" % filter) 187 | else: 188 | taxonomy, category, tag, value = \ 189 | m.group('taxonomy', 'category', 'tag', 'value') 190 | logger.debug(','.join(map(str, [taxonomy, category, tag, value]))) 191 | if taxonomy is not None: 192 | raise ValueError("filtering by tags is not implemented") 193 | if value is None: 194 | raise ValueError("value is required for filtering by non-tags") 195 | params = { 196 | '__tag_type': tag, 197 | '__value': value 198 | } 199 | if at_time is not None: 200 | s = ( 201 | 'tag2domain_get_tags_at_time_filtered' 202 | '(%(__at_time)s, %(__tag_type)s, %(__value)s)' 203 | ) 204 | params['__at_time'] = at_time 205 | else: 206 | s = ( 207 | 'tag2domain_get_open_tags_filtered' 208 | '(%(__tag_type)s, %(__value)s)' 209 | ) 210 | return s, params 211 | elif domain is not None: 212 | params = {'__domain': domain} 213 | if at_time is not None: 214 | params['__at_time'] = at_time 215 | s = ( 216 | 'tag2domain_get_tags_at_time_domain' 217 | '(%(__at_time)s, %(__domain)s)' 218 | ) 219 | else: 220 | s = 'tag2domain_get_open_tags_domain(%(__domain)s)' 221 | return s, params 222 | else: 223 | if at_time is not None: 224 | params = {'__at_time': at_time} 225 | return 'tag2domain_get_tags_at_time(%(__at_time)s)', params 226 | else: 227 | return 'tag2domain_get_open_tags()', {} 228 | -------------------------------------------------------------------------------- /tag2domain_api/app/util/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from tag2domain_api.app.util.config import config 4 | 5 | 6 | def setup(): 7 | logging.basicConfig( 8 | level=config['loglevel'], 9 | format='[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s', 10 | datefmt='%Y-%m-%d %H:%M:%S %z' 11 | ) 12 | -------------------------------------------------------------------------------- /tag2domain_api/app/util/models.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from typing import List, Union 3 | from enum import Enum 4 | from datetime import datetime 5 | 6 | 7 | class ErrorMessage(BaseModel): 8 | detail: str 9 | 10 | 11 | class VersionComparisonOperatorParameter(str, Enum): 12 | equal = "=" 13 | lessthan = "<" 14 | lessthanequal = "<=" 15 | greaterthan = ">" 16 | greaterthanequal = ">=" 17 | 18 | 19 | class TagsResponse(BaseModel): 20 | tag_id: int 21 | tag_name: str 22 | tag_description: str = None 23 | taxonomy_id: int = None 24 | extras: dict = None 25 | 26 | 27 | class ValuesResponse(BaseModel): 28 | value_id: int 29 | value: str 30 | 31 | 32 | class DomainsResponse(BaseModel): 33 | domain_id: int 34 | domain_name: str 35 | tag_type: str 36 | value: Union[str, None] 37 | start_time: datetime 38 | measured_at: Union[datetime, None] 39 | end_time: datetime = None 40 | 41 | 42 | class DomainsResponseWithVersion(BaseModel): 43 | domain_id: int 44 | domain_name: str 45 | version: Union[str, None] 46 | start_time: datetime 47 | measured_at: Union[datetime, None] 48 | end_time: datetime = None 49 | 50 | 51 | class DomainTagResponse(BaseModel): 52 | tag_id: int 53 | tag_name: str 54 | start_time: datetime 55 | measured_at: Union[datetime, None] 56 | end_time: datetime = None 57 | 58 | 59 | class DomainsWithTagsResponse(BaseModel): 60 | domain_id: int 61 | domain_name: str 62 | tags: List[DomainTagResponse] 63 | 64 | 65 | class TaxonomiesResponse(BaseModel): 66 | id: int 67 | name: str 68 | description: str = None 69 | is_actionable: float = None 70 | is_automatically_classifiable: float = None 71 | is_stable: float = None 72 | for_numbers: bool = None 73 | for_domains: bool = None 74 | url: str = None 75 | 76 | 77 | class TagsOfDomainsResponse(BaseModel): 78 | tag_id: int 79 | tag_name: str 80 | value_id: Union[int, None] 81 | value: Union[str, None] 82 | taxonomy_id: int 83 | taxonomy_name: str 84 | start_time: datetime 85 | measured_at: Union[datetime, None] 86 | end_time: datetime = None 87 | 88 | 89 | class StatsTaxonomiesResponse(BaseModel): 90 | taxonomy_name: str 91 | count: int 92 | 93 | 94 | class StatsTagsResponse(BaseModel): 95 | tag_name: str 96 | count: int 97 | 98 | 99 | class StatsCategoriesResponse(BaseModel): 100 | category: str 101 | count: int 102 | 103 | 104 | class StatsValuesResponse(BaseModel): 105 | value: Union[str, None] 106 | count: int 107 | 108 | 109 | class TagDetailsResponse(BaseModel): 110 | name: str 111 | description: str 112 | category: Union[str, None] 113 | extras: Union[dict, None] 114 | 115 | 116 | class TaxonomyFlagsResponse(BaseModel): 117 | is_actionable: Union[float, None] 118 | is_automatically_classifiable: Union[bool, None] 119 | is_stable: Union[bool, None] 120 | for_numbers: bool 121 | for_domains: bool 122 | allows_auto_tags: bool 123 | allows_auto_values: bool 124 | 125 | 126 | class TaxonomyDetailsResponse(BaseModel): 127 | name: str 128 | description: str 129 | url: Union[str, None] 130 | flags: TaxonomyFlagsResponse 131 | 132 | 133 | class ValuesAggregateResponse(BaseModel): 134 | count: int 135 | 136 | 137 | class TagInfoResponse(BaseModel): 138 | tag: TagDetailsResponse 139 | taxonomy: TaxonomyDetailsResponse 140 | values: ValuesAggregateResponse 141 | -------------------------------------------------------------------------------- /tag2domain_api/benchmark/functions.js: -------------------------------------------------------------------------------- 1 | function printStatus (requestParams, response, context, ee, next) { 2 | console.log(`${response.request.uri.path}: ${response.statusCode}`); 3 | return next(); 4 | } 5 | 6 | module.exports = { 7 | printStatus: printStatus 8 | } 9 | -------------------------------------------------------------------------------- /tag2domain_api/benchmark/test.yaml: -------------------------------------------------------------------------------- 1 | config: 2 | target: "http://dimi.labs.nic.at:8001" 3 | processor: "functions.js" 4 | phases: 5 | - duration: 10 6 | arrivalRate: 1 7 | name: Warm up 8 | - duration: 60 9 | arrivalRate: 5 10 | rampTo: 50 11 | name: Ramp up load 12 | payload: 13 | # Load search keywords from an external CSV file and make them available 14 | # to virtual user scenarios as variable "keywords": 15 | path: "domains.csv" 16 | fields: 17 | - "domain_name" 18 | scenarios: 19 | # We define one scenario: 20 | - name: "Lookup domain" 21 | flow: 22 | # Get the details of the product: 23 | - get: 24 | url: "/api/v1/bydomain/{{ domain_name }}?limit=1000" 25 | #afterResponse: "printStatus" # uncomment to see URI and status code in log 26 | -------------------------------------------------------------------------------- /tag2domain_api/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.3.0 2 | click==7.1.2 3 | envelope==1.2.4 4 | fastapi==0.65.2 5 | h11==0.9.0 6 | httptools==0.1.1 7 | importlib-metadata==1.7.0 8 | jsonpickle>=1.4.3 9 | more-itertools==8.4.0 10 | packaging==20.4 11 | pluggy==0.13.1 12 | psycopg2-binary>=2.8 13 | py==1.10.0 14 | pydantic==1.6.2 15 | pyparsing==2.4.7 16 | pytest==6.0.0 17 | python-gnupg==0.4.6 18 | python-magic==0.4.18 19 | six==1.15.0 20 | starlette==0.27.0 21 | uvicorn==0.11.7 22 | uvloop==0.14.0 23 | wcwidth==0.2.5 24 | websockets==9.1 25 | zipp==3.1.0 26 | python-dotenv==0.14.0 27 | dnspython==1.16.0 28 | filelock==3.0.12 29 | idna==2.10 30 | iniconfig==1.0.0 31 | py3-validate-email==0.2.9 32 | toml==0.10.1 33 | orjson>=3.4.6 34 | pyyaml>=5.4.1 35 | -------------------------------------------------------------------------------- /tag2domain_api/secrets.env.example: -------------------------------------------------------------------------------- 1 | DBUSER= 2 | DBPASSWORD= -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["ENABLE_MSM2TAG"] = "True" 4 | os.environ["MSM2TAG_DB_CONFIG"] = os.path.join( 5 | os.path.dirname(os.path.abspath(__file__)), 6 | "config", 7 | "db.cfg" 8 | ) 9 | -------------------------------------------------------------------------------- /tests/config/db.cfg.ci: -------------------------------------------------------------------------------- 1 | [db] 2 | host=postgres 3 | user=test_pg_user 4 | port=5432 5 | password={DB_PWD} 6 | schema=tag2domain 7 | dbname=tag2domain_mock_db 8 | sslmode=prefer 9 | application_name=py_tag2domain_test 10 | 11 | [db.intxn_table.delegation] 12 | table_name=delegation_tags 13 | id=delegation_id 14 | taxonomy_id=taxonomy_id 15 | tag_id=tag_id 16 | value_id=value_id 17 | measured_at=measured_at 18 | producer=producer 19 | start_date=start_date 20 | end_date=end_date 21 | start_ts=start_ts 22 | end_ts=end_ts 23 | 24 | [db.intxn_table.domain] 25 | table_name=domain_tags 26 | id=domain_id 27 | taxonomy_id=taxonomy_id 28 | tag_id=tag_id 29 | value_id=value_id 30 | measured_at=measured_at 31 | producer=producer 32 | start_date=start_date 33 | end_date=end_date 34 | start_ts=start_ts 35 | end_ts=end_ts 36 | 37 | [db.intxn_table.intersection] 38 | table_name=intersections 39 | id=intxn_id 40 | taxonomy_id=taxonomie_id 41 | tag_id=pickerl_id 42 | value_id=beschriftung_id 43 | measured_at=gemessen_um 44 | producer=erzeuger 45 | start_date=datum_start 46 | end_date=datum_ende 47 | start_ts=zeitstempel_start 48 | end_ts=zeitstempel_ende 49 | -------------------------------------------------------------------------------- /tests/config/db.cfg.example: -------------------------------------------------------------------------------- 1 | [db] 2 | host= 3 | user= 4 | port= 5 | password= 6 | schema= 7 | dbname= 8 | sslmode=prefer 9 | application_name=py_tag2domain_test 10 | 11 | [db.intxn_table.delegation] 12 | table_name=delegation_tags 13 | id=delegation_id 14 | taxonomy_id=taxonomy_id 15 | tag_id=tag_id 16 | value_id=value_id 17 | measured_at=measured_at 18 | producer=producer 19 | start_date=start_date 20 | end_date=end_date 21 | start_ts=start_ts 22 | end_ts=end_ts 23 | 24 | [db.intxn_table.domain] 25 | table_name=domain_tags 26 | id=domain_id 27 | taxonomy_id=taxonomy_id 28 | tag_id=tag_id 29 | value_id=value_id 30 | measured_at=measured_at 31 | producer=producer 32 | start_date=start_date 33 | end_date=end_date 34 | start_ts=start_ts 35 | end_ts=end_ts 36 | 37 | [db.intxn_table.intersection] 38 | table_name=intersections 39 | id=intxn_id 40 | taxonomy_id=taxonomie_id 41 | tag_id=pickerl_id 42 | value_id=beschriftung_id 43 | measured_at=gemessen_um 44 | producer=erzeuger 45 | start_date=datum_start 46 | end_date=datum_ende 47 | start_ts=zeitstempel_start 48 | end_ts=zeitstempel_ende 49 | -------------------------------------------------------------------------------- /tests/db_mock_data/50-init-tag2domain-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | cd "$SCRIPT_DIR" 6 | 7 | if [ -z "$POSTGRES_USER" ]; then 8 | error_exit "POSTGRES_USER is not set" 9 | fi 10 | 11 | if [ -z "$POSTGRES_DB" ]; then 12 | error_exit "POSTGRES_DB is not set" 13 | fi 14 | 15 | if [ -z "$TAG2DOMAIN_SCHEMA" ]; then 16 | error_exit "TAG2DOMAIN_SCHEMA is not set" 17 | fi 18 | 19 | function run_psql_script { 20 | echo "RUNNING PSQL SCRIPT $(pwd)/$1" 21 | psql \ 22 | -v ON_ERROR_STOP=1 \ 23 | -v t2d_schema="$TAG2DOMAIN_SCHEMA" \ 24 | --username "$POSTGRES_USER" \ 25 | --dbname "$POSTGRES_DB" \ 26 | -f "$1" 27 | } 28 | 29 | if [ -n "$POSTGRES_HOST" ]; then 30 | export PGHOST="$POSTGRES_HOST" 31 | fi 32 | 33 | if [ -n "$POSTGRES_PORT" ]; then 34 | export PGPORT="$POSTGRES_PORT" 35 | fi 36 | 37 | if [ -n "$POSTGRES_PASSWORD_FILE" ]; then 38 | export PGPASSFILE="$POSTGRES_PASSWORD_FILE" 39 | fi 40 | 41 | run_psql_script basic/tag2domain_db_test_schema.sql 42 | run_psql_script basic/tag2domain_db_test_glue_views.sql 43 | run_psql_script basic/tag2domain_db_test_data.sql 44 | -------------------------------------------------------------------------------- /tests/db_mock_data/basic/tag2domain_db_test_data.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS tag2domain; 2 | SET search_path TO tag2domain; 3 | 4 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_1'); 5 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_2'); 6 | INSERT INTO public.registrars (registrar_name) VALUES ('registrar_3'); 7 | 8 | INSERT INTO public.domains (domain_name) VALUES ('test1.at'); 9 | INSERT INTO public.domains (domain_name) VALUES ('test2.at'); 10 | INSERT INTO public.domains (domain_name) VALUES ('test3.at'); 11 | INSERT INTO public.domains (domain_name) VALUES ('test4.at'); 12 | INSERT INTO public.domains (domain_name) VALUES ('test5.at'); 13 | 14 | INSERT INTO public.delegations (domain_id, registrar_id, create_ts, purge_ts) VALUES (1, 1, '2020-01-17 12:53:21', NULL); 15 | INSERT INTO public.delegations (domain_id, registrar_id, create_ts, purge_ts) VALUES (2, 1, '2020-02-17 12:53:21', '2020-06-30 04:00:00'); 16 | INSERT INTO public.delegations (domain_id, registrar_id, create_ts, purge_ts) VALUES (3, 2, '2020-05-01 14:00:00', NULL); 17 | INSERT INTO public.delegations (domain_id, registrar_id, create_ts, purge_ts) VALUES (4, 2, '2020-01-17 12:53:21', NULL); 18 | INSERT INTO public.delegations (domain_id, registrar_id, create_ts, purge_ts) VALUES (5, 3, '2020-08-20 23:50:00', NULL); 19 | 20 | INSERT INTO taxonomy (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) VALUES ('tax_test1', 'test taxonomie 1', 1, true, false, true, true, 'test.at/test_taxonomie_1', false, false); 21 | INSERT INTO taxonomy (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) VALUES ('tax_test2', 'test taxonomie 2', 1, true, false, true, true, 'test.at/test_taxonomie_2', true, false); 22 | INSERT INTO taxonomy (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) VALUES ('tax_test3', 'test taxonomie 3', 0.5, true, false, true, true, 'test.at/test_taxonomie_3', false, true); 23 | INSERT INTO taxonomy (name, description, is_actionable, is_automatically_classifiable, is_stable, for_numbers, for_domains, url, allows_auto_tags, allows_auto_values) VALUES ('tax_test4', 'test taxonomie 4', NULL, true, false, true, true, 'test.at/test_taxonomie_4', true, true); 24 | 25 | INSERT INTO tags (tag_name, tag_description, taxonomy_id, extras) VALUES ('test_tag_1_tax_1', 'test tag 1 for tax 1', 1, '{}'); 26 | INSERT INTO tags (tag_name, tag_description, taxonomy_id, extras) VALUES ('test_tag_2_tax_1', 'test tag 2 for tax 1', 1, '{}'); 27 | INSERT INTO tags (tag_name, tag_description, taxonomy_id, extras) VALUES ('test_tag_3_tax_1', 'test tag 3 for tax 1', 1, '{}'); 28 | INSERT INTO tags (tag_name, tag_description, taxonomy_id, extras) VALUES ('test_tag_1_tax_3', 'test tag 1 for tax 3', 3, '{}'); 29 | 30 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('value_1_tag_1', 1); 31 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('value_2_tag_1', 1); 32 | INSERT INTO taxonomy_tag_val (value, tag_id) VALUES ('value_1_tag_4', 4); 33 | 34 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 35 | VALUES (1, 1, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 36 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 37 | VALUES (2, 1, '20200317', null, 1, 1, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, null); 38 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 39 | VALUES (1, 2, '20200425', null, 1, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 40 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 41 | VALUES (1, 3, '20200425', '20200710', 1, null, to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), 'test_producer3'); 42 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 43 | VALUES (1, 4, '20200425', null, 3, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer3'); 44 | 45 | INSERT INTO delegation_tags (delegation_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 46 | VALUES (1, 1, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 47 | INSERT INTO delegation_tags (delegation_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 48 | VALUES (2, 1, '20200317', null, 1, 1, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, null); 49 | INSERT INTO delegation_tags (delegation_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 50 | VALUES (1, 2, '20200425', null, 1, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 51 | INSERT INTO delegation_tags (delegation_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 52 | VALUES (1, 3, '20200425', '20200710', 1, null, to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), 'test_producer3'); 53 | INSERT INTO delegation_tags (delegation_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 54 | VALUES (1, 4, '20200425', null, 3, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer3'); 55 | 56 | INSERT INTO intersections (intxn_id, pickerl_id, datum_start, datum_ende, taxonomie_id, beschriftung_id, gemessen_um, zeitstempel_start, zeitstempel_ende, erzeuger) 57 | VALUES (1, 1, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 58 | INSERT INTO intersections (intxn_id, pickerl_id, datum_start, datum_ende, taxonomie_id, beschriftung_id, gemessen_um, zeitstempel_start, zeitstempel_ende, erzeuger) 59 | VALUES (2, 1, '20200317', null, 1, 1, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, null); 60 | INSERT INTO intersections (intxn_id, pickerl_id, datum_start, datum_ende, taxonomie_id, beschriftung_id, gemessen_um, zeitstempel_start, zeitstempel_ende, erzeuger) 61 | VALUES (1, 2, '20200425', null, 1, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 62 | INSERT INTO intersections (intxn_id, pickerl_id, datum_start, datum_ende, taxonomie_id, beschriftung_id, gemessen_um, zeitstempel_start, zeitstempel_ende, erzeuger) 63 | VALUES (1, 3, '20200425', '20200710', 1, null, to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-07-10 14:20:00', 'YYYY-MM-DD HH24:MI:SS'), 'test_producer3'); 64 | INSERT INTO intersections (intxn_id, pickerl_id, datum_start, datum_ende, taxonomie_id, beschriftung_id, gemessen_um, zeitstempel_start, zeitstempel_ende, erzeuger) 65 | VALUES (1, 4, '20200425', null, 3, null, to_timestamp('2020-06-30 20:51:36', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-04-25 18:21:00', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer3'); 66 | -------------------------------------------------------------------------------- /tests/db_mock_data/basic/tag2domain_db_test_glue_views.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS tag2domain; 2 | SET search_path TO tag2domain; 3 | 4 | CREATE OR REPLACE VIEW v_unified_tags 5 | AS 6 | SELECT 7 | domains.domain_id, 8 | domains.domain_name, 9 | 'delegation' AS tag_type, 10 | delegation_tags.tag_id, 11 | delegation_tags.value_id, 12 | delegation_tags.start_ts AS start_ts, 13 | delegation_tags.measured_at AS measured_at, 14 | delegation_tags.end_ts AS end_ts 15 | FROM public.domains AS domains 16 | JOIN public.delegations AS delegations USING (domain_id) 17 | JOIN delegation_tags USING (delegation_id) 18 | UNION ALL 19 | SELECT 20 | domains.domain_id, 21 | domains.domain_name, 22 | 'domain' AS tag_type, 23 | domain_tags.tag_id, 24 | domain_tags.value_id, 25 | domain_tags.start_ts AS start_ts, 26 | domain_tags.measured_at AS measured_at, 27 | domain_tags.end_ts AS end_ts 28 | FROM public.domains AS domains 29 | JOIN domain_tags USING (domain_id) 30 | UNION ALL 31 | SELECT 32 | domains.domain_id, 33 | domains.domain_name, 34 | 'intersection' AS tag_type, 35 | intersections.pickerl_id AS tag_id, 36 | intersections.beschriftung_id AS value_id, 37 | intersections.zeitstempel_start AS start_ts, 38 | intersections.gemessen_um AS measured_at, 39 | intersections.zeitstempel_ende AS end_ts 40 | FROM intersections 41 | JOIN public.domains AS domains ON (domains.domain_id = intersections.intxn_id) 42 | ; 43 | 44 | CREATE OR REPLACE VIEW v_tag2domain_domain_filter 45 | AS 46 | -- registrars 47 | SELECT 48 | domains.domain_id AS domain_id, 49 | 'registrar-id' AS tag_name, 50 | delegations.create_ts AS start_ts, 51 | delegations.purge_ts AS end_ts, 52 | registrars.registrar_id::text AS value 53 | FROM public.registrars AS registrars 54 | JOIN public.delegations AS delegations USING(registrar_id) 55 | JOIN public.domains AS domains USING(domain_id) 56 | ; 57 | 58 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags; 59 | -- function tag2domain_get_open_tags() 60 | -- returns a table with all currently open tags 61 | CREATE FUNCTION tag2domain_get_open_tags() 62 | RETURNS TABLE( 63 | domain_id bigint, 64 | domain_name character varying(100), 65 | tag_type text, 66 | tag_id int, 67 | value_id int, 68 | start_time timestamp with time zone, 69 | measured_at timestamp with time zone, 70 | end_time timestamp with time zone 71 | ) AS $$ 72 | SELECT * FROM v_unified_tags 73 | WHERE (v_unified_tags.end_ts IS NULL) 74 | $$ LANGUAGE SQL STABLE 75 | SET search_path TO tag2domain 76 | ; 77 | 78 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_filtered; 79 | -- function tag2domain_get_open_tags_filtered(filter_type, filter_value) 80 | -- 81 | -- returns a table with all currently open tags for all domains that filtered 82 | -- through the v_tag2domain_domain_filter filter tables. 83 | -- 84 | -- filter_type references the tag_name column and filter_value the value column of the 85 | -- v_tag2domain_domain_filter table. A domain passes if a row with 86 | -- tag_name=filter_type AND value=filter_value 87 | -- exists. 88 | CREATE FUNCTION tag2domain_get_open_tags_filtered(filter_type text, filter_value text) 89 | RETURNS TABLE( 90 | domain_id bigint, 91 | domain_name character varying(100), 92 | tag_type text, 93 | tag_id int, 94 | value_id int, 95 | start_time timestamp with time zone, 96 | measured_at timestamp with time zone, 97 | end_time timestamp with time zone 98 | ) AS $$ 99 | SELECT v_unified_tags.* FROM v_unified_tags 100 | JOIN v_tag2domain_domain_filter USING (domain_id) 101 | WHERE ( 102 | (v_unified_tags.end_ts IS NULL) 103 | AND (v_tag2domain_domain_filter.end_ts IS NULL) 104 | AND (v_tag2domain_domain_filter.tag_name = $1) 105 | AND (v_tag2domain_domain_filter.value = $2) 106 | ) 107 | $$ LANGUAGE SQL STABLE 108 | SET search_path TO tag2domain 109 | ; 110 | 111 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time; 112 | -- function tag2domain_get_tags_at_time(at_time) 113 | -- Returns all tags that were open at time at_time. 114 | CREATE FUNCTION tag2domain_get_tags_at_time(at_time timestamp) 115 | RETURNS TABLE( 116 | domain_id bigint, 117 | domain_name character varying(100), 118 | tag_type text, 119 | tag_id int, 120 | value_id int, 121 | start_time timestamp with time zone, 122 | measured_at timestamp with time zone, 123 | end_time timestamp with time zone 124 | ) AS $$ 125 | SELECT v_unified_tags.* FROM v_unified_tags 126 | WHERE ( 127 | (v_unified_tags.start_ts <= $1) 128 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 129 | ) 130 | $$ LANGUAGE SQL STABLE 131 | SET search_path TO tag2domain 132 | ; 133 | 134 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_filtered; 135 | -- function tag2domain_get_tags_at_time_filtered(at_time, filter_type, filter_value) 136 | -- 137 | -- returns a table with all tags that were open at time at_time for all domains that 138 | -- filtered through the v_tag2domain_domain_filter filter tables. 139 | -- 140 | -- filter_type references the tag_name column and filter_value the value column of the 141 | -- v_tag2domain_domain_filter table. A domain passes if a row with 142 | -- tag_name=filter_type AND value=filter_value 143 | -- exists. 144 | CREATE FUNCTION tag2domain_get_tags_at_time_filtered(at_time timestamp, filter_type text, filter_value text) 145 | RETURNS TABLE( 146 | domain_id bigint, 147 | domain_name character varying(100), 148 | tag_type text, 149 | tag_id int, 150 | value_id int, 151 | start_time timestamp with time zone, 152 | measured_at timestamp with time zone, 153 | end_time timestamp with time zone 154 | ) AS $$ 155 | SELECT v_unified_tags.* FROM v_unified_tags 156 | JOIN v_tag2domain_domain_filter USING (domain_id) 157 | WHERE ( 158 | (v_unified_tags.start_ts <= $1) 159 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 160 | AND (v_tag2domain_domain_filter.start_ts AT TIME ZONE 'UTC' <= $1) 161 | AND ((v_tag2domain_domain_filter.end_ts AT TIME ZONE 'UTC' > $1) OR (v_tag2domain_domain_filter.end_ts IS NULL)) 162 | AND (v_tag2domain_domain_filter.tag_name = $2) 163 | AND (v_tag2domain_domain_filter.value = $3) 164 | ) 165 | $$ LANGUAGE SQL STABLE 166 | SET search_path TO tag2domain 167 | ; 168 | 169 | DROP FUNCTION IF EXISTS tag2domain_get_open_tags_domain; 170 | -- function tag2domain_get_open_tags_domain(domain_name) 171 | -- 172 | -- returns a table with the open tags for a single domain with name domain_name 173 | CREATE FUNCTION tag2domain_get_open_tags_domain(domain_name character varying (100)) 174 | RETURNS TABLE( 175 | domain_id bigint, 176 | domain_name character varying(100), 177 | tag_type text, 178 | tag_id int, 179 | value_id int, 180 | start_time timestamp with time zone, 181 | measured_at timestamp with time zone, 182 | end_time timestamp with time zone 183 | ) AS $$ 184 | SELECT v_unified_tags.* FROM v_unified_tags 185 | WHERE 186 | (v_unified_tags.end_ts IS NULL) 187 | AND (v_unified_tags.domain_name = $1) 188 | $$ LANGUAGE SQL STABLE 189 | SET search_path TO tag2domain 190 | ; 191 | 192 | DROP FUNCTION IF EXISTS tag2domain_get_tags_at_time_domain; 193 | -- function tag2domain_get_open_tags_domain(at_time, domain_name) 194 | -- 195 | -- returns a table with the tags set at time at_time for a single domain with name domain_name 196 | CREATE FUNCTION tag2domain_get_tags_at_time_domain(at_time timestamp, domain_name character varying (100)) 197 | RETURNS TABLE( 198 | domain_id bigint, 199 | domain_name character varying(100), 200 | tag_type text, 201 | tag_id int, 202 | value_id int, 203 | start_time timestamp with time zone, 204 | measured_at timestamp with time zone, 205 | end_time timestamp with time zone 206 | ) AS $$ 207 | SELECT * FROM v_unified_tags 208 | WHERE ( 209 | (v_unified_tags.start_ts <= $1) 210 | AND ((v_unified_tags.end_ts > $1) OR (v_unified_tags.end_ts IS NULL)) 211 | AND (v_unified_tags.domain_name = $2) 212 | ) 213 | $$ LANGUAGE SQL STABLE 214 | SET search_path TO tag2domain 215 | ; 216 | 217 | DROP FUNCTION IF EXISTS tag2domain_get_all_tags_domain; 218 | -- function tag2domain_get_all_tags_domain(domain_name) 219 | -- 220 | -- returns a table with all tags that were ever set for a single domain with name domain_name 221 | CREATE FUNCTION tag2domain_get_all_tags_domain(domain_name character varying (100)) 222 | RETURNS TABLE( 223 | domain_id bigint, 224 | domain_name character varying(100), 225 | tag_type text, 226 | tag_id int, 227 | value_id int, 228 | start_time timestamp with time zone, 229 | measured_at timestamp with time zone, 230 | end_time timestamp with time zone 231 | ) AS $$ 232 | SELECT v_unified_tags.* FROM v_unified_tags 233 | WHERE ( 234 | (v_unified_tags.domain_name = $1) 235 | ) 236 | $$ LANGUAGE SQL 237 | SET search_path TO tag2domain 238 | ; -------------------------------------------------------------------------------- /tests/db_mock_data/basic/tag2domain_db_test_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS tag2domain; 2 | SET search_path TO tag2domain; 3 | 4 | CREATE SEQUENCE public.domain_seq; 5 | CREATE TABLE public.domains 6 | ( 7 | domain_id bigint NOT NULL DEFAULT nextval('public.domain_seq'::regclass), 8 | domain_name character varying(100) COLLATE pg_catalog."default", 9 | pdo character varying(10) COLLATE pg_catalog."default", 10 | u_label character varying(100) COLLATE pg_catalog."default", 11 | isidn boolean DEFAULT false, 12 | split boolean DEFAULT false, 13 | CONSTRAINT pk_domains PRIMARY KEY (domain_id), 14 | CONSTRAINT idx_domains_domain_name UNIQUE (domain_name) 15 | ) 16 | WITH ( 17 | OIDS = FALSE 18 | ) 19 | TABLESPACE pg_default; 20 | 21 | CREATE SEQUENCE public.registrar_seq; 22 | CREATE TABLE public.registrars 23 | ( 24 | registrar_id bigint NOT NULL DEFAULT nextval('public.registrar_seq'::regclass), 25 | registrar_name character varying(100) COLLATE pg_catalog."default", 26 | CONSTRAINT pk_registrars PRIMARY KEY (registrar_id), 27 | CONSTRAINT idx_registrars_registrar_name UNIQUE (registrar_name) 28 | ) 29 | WITH ( 30 | OIDS = FALSE 31 | ) 32 | TABLESPACE pg_default; 33 | 34 | CREATE TABLE domain_tags 35 | ( 36 | domain_id bigint, 37 | tag_id integer, 38 | start_date integer, 39 | end_date integer, 40 | taxonomy_id integer, 41 | value_id integer, 42 | measured_at timestamp with time zone DEFAULT now(), 43 | start_ts timestamp with time zone DEFAULT now(), 44 | end_ts timestamp with time zone, 45 | producer character varying(100) COLLATE pg_catalog."default" DEFAULT NULL, 46 | CONSTRAINT fk_delegation_tags_0_domains FOREIGN KEY (domain_id) 47 | REFERENCES public.domains (domain_id) MATCH SIMPLE 48 | ON UPDATE NO ACTION 49 | ON DELETE NO ACTION, 50 | CONSTRAINT fk_delegation_tags_0_tags FOREIGN KEY (tag_id) 51 | REFERENCES tags (tag_id) MATCH SIMPLE 52 | ON UPDATE NO ACTION 53 | ON DELETE NO ACTION 54 | ) 55 | WITH ( 56 | OIDS = FALSE 57 | ) 58 | TABLESPACE pg_default; 59 | 60 | CREATE SEQUENCE public.delegation_seq; 61 | 62 | CREATE TABLE public.delegations 63 | ( 64 | delegation_id bigint NOT NULL DEFAULT nextval('public.delegation_seq'::regclass), 65 | domain_id bigint NOT NULL, 66 | registrar_id bigint NOT NULL, 67 | create_date integer, 68 | create_time time without time zone, 69 | purge_date integer, 70 | purge_time time without time zone, 71 | stichtag timestamp without time zone, 72 | price_class bigint, 73 | website_category_id bigint, 74 | ry_del_id bigint, 75 | create_ts timestamp without time zone, 76 | purge_ts timestamp without time zone, 77 | delegation_period tsrange, 78 | CONSTRAINT pk_delegations_0 PRIMARY KEY (delegation_id), 79 | CONSTRAINT idx_delegations_unique_0 UNIQUE (domain_id, create_date, create_time), 80 | CONSTRAINT fk_delegations_domains FOREIGN KEY (domain_id) 81 | REFERENCES public.domains (domain_id) MATCH SIMPLE 82 | ON UPDATE NO ACTION 83 | ON DELETE CASCADE 84 | ) 85 | 86 | TABLESPACE pg_default; 87 | 88 | CREATE TABLE delegation_tags 89 | ( 90 | delegation_id bigint, 91 | tag_id integer, 92 | start_date integer, 93 | end_date integer, 94 | taxonomy_id integer, 95 | value_id integer, 96 | measured_at timestamp with time zone DEFAULT now(), 97 | start_ts timestamp with time zone DEFAULT now(), 98 | end_ts timestamp with time zone DEFAULT now(), 99 | producer character varying(100) COLLATE pg_catalog."default" DEFAULT NULL, 100 | CONSTRAINT fk_delegation_tags_delegations FOREIGN KEY (delegation_id) 101 | REFERENCES public.delegations (delegation_id) MATCH SIMPLE 102 | ON UPDATE NO ACTION 103 | ON DELETE NO ACTION, 104 | CONSTRAINT fk_delegation_tags_tags FOREIGN KEY (tag_id) 105 | REFERENCES tags (tag_id) MATCH SIMPLE 106 | ON UPDATE NO ACTION 107 | ON DELETE NO ACTION 108 | ) 109 | 110 | TABLESPACE pg_default; 111 | 112 | 113 | CREATE TABLE intersections 114 | ( 115 | intxn_id bigint, 116 | pickerl_id integer, 117 | datum_start integer, 118 | datum_ende integer, 119 | taxonomie_id integer, 120 | beschriftung_id integer, 121 | gemessen_um timestamp with time zone DEFAULT now(), 122 | zeitstempel_start timestamp with time zone DEFAULT now(), 123 | zeitstempel_ende timestamp with time zone DEFAULT now(), 124 | erzeuger character varying(100) COLLATE pg_catalog."default" DEFAULT NULL, 125 | CONSTRAINT fk_intersection_tags_tags FOREIGN KEY (pickerl_id) 126 | REFERENCES tags (tag_id) MATCH SIMPLE 127 | ON UPDATE NO ACTION 128 | ON DELETE NO ACTION 129 | ) 130 | 131 | TABLESPACE pg_default; -------------------------------------------------------------------------------- /tests/db_mock_data/test_cases/tags_categories.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO tags (tag_id, tag_name, tag_description, taxonomy_id, extras) VALUES (2000561, 'cat_1::test_tag_1_tax_4', 'test tag 1 for tax 4 (cat 1)', 4, '{}'); 2 | INSERT INTO tags (tag_id, tag_name, tag_description, taxonomy_id, extras) VALUES (2000562, 'cat_1::test_tag_2_tax_4', 'test tag 2 for tax 4 (cat 1)', 4, '{}'); 3 | INSERT INTO tags (tag_id, tag_name, tag_description, taxonomy_id, extras) VALUES (2000563, 'cat_2::test_tag_3_tax_4', 'test tag 3 for tax 4 (cat 2)', 4, '{}'); 4 | INSERT INTO tags (tag_id, tag_name, tag_description, taxonomy_id, extras) VALUES (2000564, 'test_tag_4_tax_4', 'test tag 4 for tax 4 (no cat)', 4, '{}'); 5 | 6 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 7 | VALUES (1, 2000561, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 8 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 9 | VALUES (1, 2000562, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 10 | 11 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 12 | VALUES (1, 2000563, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 13 | 14 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 15 | VALUES (2, 2000563, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 16 | 17 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 18 | VALUES (1, 2000564, '20200317', null, 1, null, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); -------------------------------------------------------------------------------- /tests/db_mock_data/test_cases/version_tags.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO taxonomy_tag_val (id, value, tag_id) VALUES (100001, '1.0.2', 2); 2 | INSERT INTO taxonomy_tag_val (id, value, tag_id) VALUES (100002, '2.1.0', 2); 3 | 4 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 5 | VALUES (1, 2, '20200317', null, 1, 100001, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 6 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 7 | VALUES (2, 2, '20200317', null, 1, 100002, to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), null, 'test_producer1'); 8 | INSERT INTO domain_tags (domain_id, tag_id, start_date, end_date, taxonomy_id, value_id, measured_at, start_ts, end_ts, producer) 9 | VALUES (3, 2, '20200317', '20200820', 1, 100002, to_timestamp('2020-08-20 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-03-17 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), to_timestamp('2020-08-20 12:53:21', 'YYYY-MM-DD HH24:MI:SS'), 'test_producer1'); -------------------------------------------------------------------------------- /tests/env.example: -------------------------------------------------------------------------------- 1 | # username - must be the same a specified in tests/config/db.cfg 2 | POSTGRES_USER= 3 | 4 | # password - must be the same a specified in tests/config/db.cfg 5 | POSTGRES_PASSWORD= 6 | 7 | TAG2DOMAIN_SCHEMA=tag2domain -------------------------------------------------------------------------------- /tests/tests_py_tag2domain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tests/tests_py_tag2domain/__init__.py -------------------------------------------------------------------------------- /tests/tests_py_tag2domain/db_test_classes.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from py_tag2domain.db import Psycopg2Adapter 4 | from tests.util import PostgresReadOnlyDBTest, PostgresAutoDBTest 5 | 6 | 7 | class PostgresReadOnlyPsycopgAdapterTest(PostgresReadOnlyDBTest): 8 | @classmethod 9 | def setUpClass(cls): 10 | print("setting up class PostgresReadOnlyPsycopgAdapterTest") 11 | super(PostgresReadOnlyPsycopgAdapterTest, cls).setUpClass() 12 | cls.adapter = Psycopg2Adapter( 13 | cls.db_connection, 14 | cls.intxn_table_mappings 15 | ) 16 | 17 | @classmethod 18 | def tearDownClass(cls): 19 | print("tearing down class PostgresReadOnlyPsycopgAdapterTest") 20 | super(PostgresReadOnlyPsycopgAdapterTest, cls).tearDownClass() 21 | 22 | def setUp(self): 23 | print("setting up instance of PostgresReadOnlyPsycopgAdapterTest") 24 | super(PostgresReadOnlyPsycopgAdapterTest, self).setUp() 25 | 26 | 27 | class PostgresPsycopgAdapterAutoDBTest(PostgresAutoDBTest): 28 | @classmethod 29 | def setUpClass(cls): 30 | print("setting up class PostgresPsycopgAdapterAutoDBTest") 31 | super(PostgresPsycopgAdapterAutoDBTest, cls).setUpClass() 32 | 33 | def setUp(self): 34 | print("setting up instance of PostgresPsycopgAdapterAutoDBTest") 35 | super(PostgresPsycopgAdapterAutoDBTest, self).setUp() 36 | self.adapter = Psycopg2Adapter( 37 | self.db_connection, 38 | self.__class__.intxn_table_mappings 39 | ) 40 | 41 | def tearDown(self): 42 | print("tearing down instance of PostgresPsycopgAdapterAutoDBTest") 43 | super(PostgresPsycopgAdapterAutoDBTest, self).tearDown() 44 | -------------------------------------------------------------------------------- /tests/tests_py_tag2domain/test_util.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from py_tag2domain.util import calc_changes 3 | 4 | 5 | class UtilTests(TestCase): 6 | def test_calc_changes(self): 7 | from_ = set([82, 71]) 8 | to = set([102, 71]) 9 | changes = calc_changes(from_, to) 10 | 11 | self.assertSetEqual(set(changes['insert']), set([102, ])) 12 | self.assertSetEqual(set(changes['prolong']), set([71, ])) 13 | self.assertSetEqual(set(changes['end']), set([82, ])) 14 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/certtools/tag2domain/09386ac1f1935b06a7d136eed96973ac824e91be/tests/tests_tag2domain_api/__init__.py -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/db_test_classes.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from tag2domain_api.app.util.db import set_db 4 | 5 | from tests.util import ( 6 | config as tag2domain_test_config, 7 | PostgresReadOnlyDBTest, 8 | PostgresAutoDBTest 9 | ) 10 | 11 | 12 | class APIReadOnlyTest(PostgresReadOnlyDBTest): 13 | @classmethod 14 | def setUpClass(cls): 15 | print("setting up class APIReadOnlyTest") 16 | super(APIReadOnlyTest, cls).setUpClass() 17 | cls.no_db_flag = tag2domain_test_config is None 18 | if not cls.no_db_flag: 19 | set_db(cls.db_connection) 20 | 21 | @classmethod 22 | def tearDownClass(cls): 23 | print("tearing down class APIReadOnlyTest") 24 | super(APIReadOnlyTest, cls).tearDownClass() 25 | 26 | def setUp(self): 27 | print("setting up instance of APIReadOnlyTest") 28 | super(APIReadOnlyTest, self).setUp() 29 | 30 | 31 | class APIWithAdditionalDBDataTest(PostgresAutoDBTest): 32 | def setUp(self, *args): 33 | if len(args) == 0: 34 | raise ValueError("no DB test cases specified") 35 | 36 | print("setting up instance of APIReadOnlyTest") 37 | super(APIWithAdditionalDBDataTest, self).setUp() 38 | 39 | for name in args: 40 | print("loading additional db dataset '%s'" % name) 41 | self.loadTestCase(name) 42 | 43 | set_db(self.db_connection) 44 | 45 | 46 | class APIWriteTest(PostgresAutoDBTest): 47 | def setUp(self): 48 | print("setting up instance of APIWriteTest") 49 | super(APIWriteTest, self).setUp() 50 | 51 | set_db(self.db_connection) 52 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_bydomain_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | import pprint 4 | 5 | from tag2domain_api.app.main import app 6 | 7 | from .db_test_classes import APIReadOnlyTest 8 | 9 | pprinter = pprint.PrettyPrinter(indent=4) 10 | client = TestClient(app) 11 | 12 | 13 | class ByDomainEndpointsTest(APIReadOnlyTest): 14 | def test_bydomain_open_tags(self): 15 | response = client.get("/api/v1/bydomain/test1.at") 16 | assert response.status_code == 200 17 | assert response.json() == DOMAIN_TEST1_OPEN_TAGS 18 | 19 | def test_bydomain_tags_history(self): 20 | response = client.get("/api/v1/bydomain/test1.at/history") 21 | assert response.status_code == 200 22 | assert response.json() == DOMAIN_TEST1_TAG_HISTORY 23 | 24 | def test_bydomain_tags_nonexisting(self): 25 | response = client.get("/api/v1/bydomain/test_nonexisting.at") 26 | assert response.status_code == 200 27 | assert response.json() == [] 28 | 29 | def test_bydomain_tags_history_nonexisting(self): 30 | response = client.get("/api/v1/bydomain/test_nonexisting.at/history") 31 | assert response.status_code == 200 32 | assert response.json() == [] 33 | 34 | 35 | DOMAIN_TEST1_OPEN_TAGS = list(sorted([ 36 | { 37 | 'end_time': None, 38 | 'measured_at': '2020-03-17T12:53:21+00:00', 39 | 'start_time': '2020-03-17T12:53:21+00:00', 40 | 'tag_id': 1, 41 | 'tag_name': 'test_tag_1_tax_1', 42 | 'taxonomy_id': 1, 43 | 'taxonomy_name': 'tax_test1', 44 | 'value': None, 45 | 'value_id': None 46 | }, 47 | { 48 | 'end_time': None, 49 | 'measured_at': '2020-06-30T20:51:36+00:00', 50 | 'start_time': '2020-04-25T18:21:00+00:00', 51 | 'tag_id': 2, 52 | 'tag_name': 'test_tag_2_tax_1', 53 | 'taxonomy_id': 1, 54 | 'taxonomy_name': 'tax_test1', 55 | 'value': None, 56 | 'value_id': None 57 | }, 58 | { 59 | 'end_time': None, 60 | 'measured_at': '2020-06-30T20:51:36+00:00', 61 | 'start_time': '2020-04-25T18:21:00+00:00', 62 | 'tag_id': 4, 63 | 'tag_name': 'test_tag_1_tax_3', 64 | 'taxonomy_id': 3, 65 | 'taxonomy_name': 'tax_test3', 66 | 'value': None, 67 | 'value_id': None 68 | }, 69 | ] * 3, key=lambda tag: tag["tag_id"])) 70 | 71 | DOMAIN_TEST1_TAG_HISTORY = list(sorted([ 72 | { 73 | 'end_time': None, 74 | 'measured_at': '2020-03-17T12:53:21+00:00', 75 | 'start_time': '2020-03-17T12:53:21+00:00', 76 | 'tag_id': 1, 77 | 'tag_name': 'test_tag_1_tax_1', 78 | 'taxonomy_id': 1, 79 | 'taxonomy_name': 'tax_test1', 80 | 'value': None, 81 | 'value_id': None 82 | }, 83 | { 84 | 'end_time': None, 85 | 'measured_at': '2020-06-30T20:51:36+00:00', 86 | 'start_time': '2020-04-25T18:21:00+00:00', 87 | 'tag_id': 2, 88 | 'tag_name': 'test_tag_2_tax_1', 89 | 'taxonomy_id': 1, 90 | 'taxonomy_name': 'tax_test1', 91 | 'value': None, 92 | 'value_id': None 93 | }, 94 | { 95 | 'end_time': None, 96 | 'measured_at': '2020-06-30T20:51:36+00:00', 97 | 'start_time': '2020-04-25T18:21:00+00:00', 98 | 'tag_id': 4, 99 | 'tag_name': 'test_tag_1_tax_3', 100 | 'taxonomy_id': 3, 101 | 'taxonomy_name': 'tax_test3', 102 | 'value': None, 103 | 'value_id': None 104 | }, 105 | { 106 | 'end_time': '2020-07-10T14:20:00+00:00', 107 | 'measured_at': '2020-07-10T14:20:00+00:00', 108 | 'start_time': '2020-04-25T18:21:00+00:00', 109 | 'tag_id': 3, 110 | 'tag_name': 'test_tag_3_tax_1', 111 | 'taxonomy_id': 1, 112 | 'taxonomy_name': 'tax_test1', 113 | 'value': None, 114 | 'value_id': None 115 | } 116 | ] * 3, key=lambda tag: tag["tag_id"])) 117 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_db.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | from unittest import TestCase 3 | 4 | import psycopg2 5 | 6 | import tag2domain_api.app.util.db 7 | from tag2domain_api.app.util.db import ( 8 | connect_db, 9 | disconnect_db, 10 | execute_db, 11 | set_db, 12 | get_db, 13 | get_db_cursor, 14 | get_db_dict_cursor, 15 | get_sql_base_table 16 | ) 17 | 18 | from py_tag2domain.db import Psycopg2Adapter 19 | 20 | from tests.util import parse_test_db_config 21 | 22 | pprinter = pprint.PrettyPrinter(indent=4) 23 | 24 | 25 | class DBConnectionTest(TestCase): 26 | def test_connect_db(self): 27 | config, _ = parse_test_db_config() 28 | db_conn = connect_db(config) 29 | self.assertIsInstance(db_conn, psycopg2.extensions.connection) 30 | disconnect_db() 31 | assert tag2domain_api.app.util.db._db_conn is None 32 | 33 | def test_reconnect_db(self): 34 | config, _ = parse_test_db_config() 35 | old_db_conn = connect_db(config) 36 | self.assertIsInstance(old_db_conn, psycopg2.extensions.connection) 37 | db_conn = connect_db(config) 38 | self.assertIsInstance(db_conn, psycopg2.extensions.connection) 39 | 40 | self.assertRaisesRegex( 41 | psycopg2.Error, 42 | "connection already closed", 43 | old_db_conn.cursor 44 | ) 45 | 46 | rows = execute_db("SELECT 1;") 47 | 48 | assert len(rows) == 1 49 | assert rows[0][0] == 1 50 | disconnect_db() 51 | 52 | def test_connect_db_fails_when_set_db_used(self): 53 | config, _ = parse_test_db_config() 54 | old_db_conn = connect_db(config) 55 | connection_args = Psycopg2Adapter.to_psycopg_args(config) 56 | 57 | conn = psycopg2.connect(**connection_args) 58 | set_db(conn) 59 | 60 | old_db_conn.close() 61 | 62 | self.assertRaisesRegex( 63 | ValueError, 64 | "No config given", 65 | connect_db 66 | ) 67 | 68 | def test_set_db(self): 69 | config, _ = parse_test_db_config() 70 | connection_args = Psycopg2Adapter.to_psycopg_args(config) 71 | conn = psycopg2.connect(**connection_args) 72 | set_db(conn) 73 | 74 | db_conn = get_db() 75 | self.assertIsInstance(db_conn, psycopg2.extensions.connection) 76 | 77 | assert tag2domain_api.app.util.db._db_config is None 78 | assert tag2domain_api.app.util.db._config is None 79 | 80 | def test_get_db_cursor_fails_on_not_connected(self): 81 | self.assertRaises(RuntimeError, get_db_cursor) 82 | 83 | def test_get_db_cursor(self): 84 | config, _ = parse_test_db_config() 85 | connection_args = Psycopg2Adapter.to_psycopg_args(config) 86 | conn = psycopg2.connect(**connection_args) 87 | set_db(conn) 88 | 89 | cursor = get_db_cursor() 90 | self.assertIsInstance(cursor, psycopg2.extensions.cursor) 91 | cursor.execute("SELECT 1;") 92 | disconnect_db() 93 | 94 | def test_get_db_dict_cursor_fails_on_not_connected(self): 95 | self.assertRaises(RuntimeError, get_db_dict_cursor) 96 | 97 | def test_get_db_dict_cursor(self): 98 | config, _ = parse_test_db_config() 99 | connection_args = Psycopg2Adapter.to_psycopg_args(config) 100 | conn = psycopg2.connect(**connection_args) 101 | set_db(conn) 102 | 103 | cursor = get_db_dict_cursor() 104 | self.assertIsInstance(cursor, psycopg2.extensions.cursor) 105 | cursor.execute("SELECT 1;") 106 | disconnect_db() 107 | 108 | def test_execute_db_correct_stmt(self): 109 | config, _ = parse_test_db_config() 110 | connection_args = Psycopg2Adapter.to_psycopg_args(config) 111 | conn = psycopg2.connect(**connection_args) 112 | set_db(conn) 113 | 114 | rows = execute_db("SELECT 1;") 115 | 116 | assert len(rows) == 1 117 | assert rows[0][0] == 1 118 | disconnect_db() 119 | 120 | def test_execute_db_correct_stmt_on_closed_connection(self): 121 | config, _ = parse_test_db_config() 122 | connect_db(config) 123 | tag2domain_api.app.util.db._db_conn.close() 124 | 125 | rows = execute_db("SELECT 1;") 126 | 127 | assert len(rows) == 1 128 | assert rows[0][0] == 1 129 | disconnect_db() 130 | 131 | def test_execute_failing_statement(self): 132 | config, _ = parse_test_db_config() 133 | connect_db(config) 134 | tag2domain_api.app.util.db._db_conn.close() 135 | 136 | self.assertRaises( 137 | RuntimeError, 138 | execute_db, 139 | "SELECT * FROM some_phantasy_table;" 140 | ) 141 | 142 | disconnect_db() 143 | 144 | def test_get_sql_base_table_filter_and_domain_fail(self): 145 | self.assertRaises( 146 | ValueError, 147 | get_sql_base_table, 148 | "2020-01-01T12:00:00", 149 | filter="tag=val", 150 | domain="test.at" 151 | ) 152 | 153 | def test_get_sql_base_table_invalid_filter_fail(self): 154 | self.assertRaisesRegex( 155 | ValueError, 156 | "invalid filter clause - .+", 157 | get_sql_base_table, 158 | "2020-01-01T12:00:00", 159 | filter="_.," 160 | ) 161 | 162 | def test_get_sql_base_table_filter_by_tag_fail(self): 163 | self.assertRaisesRegex( 164 | ValueError, 165 | "filtering by tags is not implemented", 166 | get_sql_base_table, 167 | "2020-01-01T12:00:00", 168 | filter="taxonomy:tag=value" 169 | ) 170 | 171 | def test_get_sql_base_table_filter_missing_value_fail(self): 172 | self.assertRaisesRegex( 173 | ValueError, 174 | "value is required for filtering by non-tags", 175 | get_sql_base_table, 176 | "2020-01-01T12:00:00", 177 | filter="tag" 178 | ) 179 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_filters_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | import pprint 4 | from urllib.parse import urlencode 5 | 6 | from tag2domain_api.app.main import app 7 | 8 | from .db_test_classes import APIReadOnlyTest 9 | 10 | pprinter = pprint.PrettyPrinter(indent=4) 11 | client = TestClient(app) 12 | 13 | 14 | class FiltersEndpointsTest(APIReadOnlyTest): 15 | def test_get_types(self): 16 | response = client.get("/api/v1/filters/types") 17 | pprinter.pprint(response.json()) 18 | assert response.status_code == 200 19 | assert response.json() == ["registrar-id", ] 20 | 21 | def test_get_values_type_exists(self): 22 | query = {"filter": "registrar-id"} 23 | response = client.get("/api/v1/filters/values/?%s" % urlencode(query)) 24 | pprinter.pprint(response.json()) 25 | assert response.status_code == 200 26 | assert response.json() == ['1', '2', '3'] 27 | 28 | def test_get_values_type_none_exist(self): 29 | query = {"filter": "some-other-filter"} 30 | response = client.get("/api/v1/filters/values/?%s" % urlencode(query)) 31 | pprinter.pprint(response.json()) 32 | assert response.status_code == 200 33 | assert response.json() == [] 34 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_meta_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | import pprint 4 | from urllib.parse import urlencode 5 | 6 | from tag2domain_api.app.main import app 7 | 8 | from .db_test_classes import APIReadOnlyTest, APIWithAdditionalDBDataTest 9 | 10 | pprinter = pprint.PrettyPrinter(indent=4) 11 | client = TestClient(app) 12 | 13 | 14 | class MetaEndpointsTest(APIReadOnlyTest): 15 | def test_get_taxonomies(self): 16 | response = client.get("/api/v1/meta/taxonomies") 17 | pprinter.pprint(response.json()) 18 | assert response.status_code == 200 19 | assert response.json() == [ 20 | { 21 | 'description': 'test taxonomie 1', 22 | 'for_domains': True, 23 | 'for_numbers': True, 24 | 'id': 1, 25 | 'is_actionable': 1.0, 26 | 'is_automatically_classifiable': 1.0, 27 | 'is_stable': 0.0, 28 | 'name': 'tax_test1', 29 | 'url': 'test.at/test_taxonomie_1'}, 30 | { 31 | 'description': 'test taxonomie 2', 32 | 'for_domains': True, 33 | 'for_numbers': True, 34 | 'id': 2, 35 | 'is_actionable': 1.0, 36 | 'is_automatically_classifiable': 1.0, 37 | 'is_stable': 0.0, 38 | 'name': 'tax_test2', 39 | 'url': 'test.at/test_taxonomie_2'}, 40 | { 41 | 'description': 'test taxonomie 3', 42 | 'for_domains': True, 43 | 'for_numbers': True, 44 | 'id': 3, 45 | 'is_actionable': 0.5, 46 | 'is_automatically_classifiable': 1.0, 47 | 'is_stable': 0.0, 48 | 'name': 'tax_test3', 49 | 'url': 'test.at/test_taxonomie_3'}, 50 | { 51 | 'description': 'test taxonomie 4', 52 | 'for_domains': True, 53 | 'for_numbers': True, 54 | 'id': 4, 55 | 'is_actionable': None, 56 | 'is_automatically_classifiable': 1.0, 57 | 'is_stable': 0.0, 58 | 'name': 'tax_test4', 59 | 'url': 'test.at/test_taxonomie_4' 60 | } 61 | ] 62 | 63 | def test_get_tags_all(self): 64 | response = client.get("/api/v1/meta/tags") 65 | pprinter.pprint(response.json()) 66 | assert response.status_code == 200 67 | assert response.json() == [ 68 | { 69 | 'extras': {}, 70 | 'tag_description': 'test tag 1 for tax 1', 71 | 'tag_id': 1, 72 | 'tag_name': 'test_tag_1_tax_1', 73 | 'taxonomy_id': 1 74 | }, 75 | { 76 | 'extras': {}, 77 | 'tag_description': 'test tag 2 for tax 1', 78 | 'tag_id': 2, 79 | 'tag_name': 'test_tag_2_tax_1', 80 | 'taxonomy_id': 1 81 | }, 82 | { 83 | 'extras': {}, 84 | 'tag_description': 'test tag 3 for tax 1', 85 | 'tag_id': 3, 86 | 'tag_name': 'test_tag_3_tax_1', 87 | 'taxonomy_id': 1 88 | }, 89 | { 90 | 'extras': {}, 91 | 'tag_description': 'test tag 1 for tax 3', 92 | 'tag_id': 4, 93 | 'tag_name': 'test_tag_1_tax_3', 94 | 'taxonomy_id': 3 95 | } 96 | ] 97 | 98 | def test_get_values_existing(self): 99 | query = { 100 | "taxonomy": "tax_test1", 101 | "tag": "test_tag_1_tax_1" 102 | } 103 | response = client.get("/api/v1/meta/values?%s" % urlencode(query)) 104 | pprinter.pprint(response.json()) 105 | assert response.status_code == 200 106 | assert response.json() == [ 107 | {'value': 'value_1_tag_1', 'value_id': 1}, 108 | {'value': 'value_2_tag_1', 'value_id': 2} 109 | ] 110 | 111 | def test_get_values_non_existing(self): 112 | query = { 113 | "taxonomy": "tax_test1", 114 | "tag": "test_tag_2_tax_1" 115 | } 116 | response = client.get("/api/v1/meta/values?%s" % urlencode(query)) 117 | pprinter.pprint(response.json()) 118 | assert response.status_code == 200 119 | assert response.json() == [] 120 | 121 | def test_get_tag_info_existing(self): 122 | query = { 123 | "taxonomy": "tax_test1", 124 | "tag": "test_tag_1_tax_1" 125 | } 126 | response = client.get("/api/v1/meta/tag?%s" % urlencode(query)) 127 | pprinter.pprint(response.json()) 128 | assert response.status_code == 200 129 | assert response.json() == { 130 | 'tag': { 131 | 'category': None, 132 | 'description': 'test tag 1 for tax 1', 133 | 'extras': {}, 134 | 'name': 'test_tag_1_tax_1' 135 | }, 136 | 'taxonomy': { 137 | 'description': 'test taxonomie 1', 138 | 'flags': { 139 | 'allows_auto_tags': False, 140 | 'allows_auto_values': False, 141 | 'for_domains': True, 142 | 'for_numbers': True, 143 | 'is_actionable': 1.0, 144 | 'is_automatically_classifiable': True, 145 | 'is_stable': True}, 146 | 'name': 'tax_test1', 147 | 'url': 'test.at/test_taxonomie_1' 148 | }, 149 | 'values': {'count': 2} 150 | } 151 | 152 | def test_get_tag_info_non_existing(self): 153 | query = { 154 | "taxonomy": "tax_test1", 155 | "tag": "test_tag_non_existing" 156 | } 157 | response = client.get("/api/v1/meta/tag?%s" % urlencode(query)) 158 | assert response.status_code == 404 159 | 160 | 161 | class MetaEndpointsWithCategoriesTest(APIWithAdditionalDBDataTest): 162 | def setUp(self): 163 | super(MetaEndpointsWithCategoriesTest, self).setUp("tags_categories") 164 | 165 | def test_get_categories_no_cats_exist(self): 166 | query = { 167 | "taxonomy": "tax_test1" 168 | } 169 | response = client.get("/api/v1/meta/categories?%s" % urlencode(query)) 170 | pprinter.pprint(response.json()) 171 | assert response.status_code == 200 172 | assert response.json() == [""] 173 | 174 | def test_get_categories_cats_exist(self): 175 | query = { 176 | "taxonomy": "tax_test4" 177 | } 178 | response = client.get("/api/v1/meta/categories?%s" % urlencode(query)) 179 | pprinter.pprint(response.json()) 180 | assert response.status_code == 200 181 | assert set(response.json()) == set([ 182 | "", 183 | "cat_1", 184 | "cat_2" 185 | ]) 186 | 187 | def test_get_all_categories(self): 188 | query = {} 189 | response = client.get("/api/v1/meta/categories?%s" % urlencode(query)) 190 | pprinter.pprint(response.json()) 191 | assert response.status_code == 200 192 | assert set(response.json()) == set([ 193 | "", 194 | "cat_1", 195 | "cat_2" 196 | ]) 197 | 198 | def test_get_tags_cat_1(self): 199 | query = { 200 | "taxonomy": "tax_test4", 201 | "category": "cat_1" 202 | } 203 | response = client.get("/api/v1/meta/tags?%s" % urlencode(query)) 204 | pprinter.pprint(response.json()) 205 | assert response.status_code == 200 206 | assert response.json() == [ 207 | { 208 | 'extras': {}, 209 | 'tag_description': 'test tag 1 for tax 4 (cat 1)', 210 | 'tag_id': 2000561, 211 | 'tag_name': 'cat_1::test_tag_1_tax_4', 212 | 'taxonomy_id': 4 213 | }, 214 | { 215 | 'extras': {}, 216 | 'tag_description': 'test tag 2 for tax 4 (cat 1)', 217 | 'tag_id': 2000562, 218 | 'tag_name': 'cat_1::test_tag_2_tax_4', 219 | 'taxonomy_id': 4 220 | } 221 | ] 222 | 223 | def test_get_tags_no_cat(self): 224 | query = { 225 | "taxonomy": "tax_test4", 226 | "category": "" 227 | } 228 | response = client.get("/api/v1/meta/tags?%s" % urlencode(query)) 229 | pprinter.pprint(response.json()) 230 | assert response.status_code == 200 231 | assert response.json() == [ 232 | { 233 | 'extras': {}, 234 | 'tag_description': 'test tag 4 for tax 4 (no cat)', 235 | 'tag_id': 2000564, 236 | 'tag_name': 'test_tag_4_tax_4', 237 | 'taxonomy_id': 4 238 | } 239 | ] 240 | 241 | def test_get_tags_all_tax_test_4(self): 242 | query = { 243 | "taxonomy": "tax_test4" 244 | } 245 | response = client.get("/api/v1/meta/tags?%s" % urlencode(query)) 246 | pprinter.pprint(response.json()) 247 | assert response.status_code == 200 248 | assert response.json() == [ 249 | { 250 | 'extras': {}, 251 | 'tag_description': 'test tag 1 for tax 4 (cat 1)', 252 | 'tag_id': 2000561, 253 | 'tag_name': 'cat_1::test_tag_1_tax_4', 254 | 'taxonomy_id': 4 255 | }, 256 | { 257 | 'extras': {}, 258 | 'tag_description': 'test tag 2 for tax 4 (cat 1)', 259 | 'tag_id': 2000562, 260 | 'tag_name': 'cat_1::test_tag_2_tax_4', 261 | 'taxonomy_id': 4 262 | }, 263 | { 264 | 'tag_id': 2000563, 265 | 'tag_name': 'cat_2::test_tag_3_tax_4', 266 | 'tag_description': 'test tag 3 for tax 4 (cat 2)', 267 | 'taxonomy_id': 4, 268 | 'extras': {} 269 | }, 270 | { 271 | 'extras': {}, 272 | 'tag_description': 'test tag 4 for tax 4 (no cat)', 273 | 'tag_id': 2000564, 274 | 'tag_name': 'test_tag_4_tax_4', 275 | 'taxonomy_id': 4 276 | } 277 | ] 278 | 279 | def test_get_tags_fail_missing_taxonomy(self): 280 | query = { 281 | "category": "cat_1" 282 | } 283 | response = client.get("/api/v1/meta/tags?%s" % urlencode(query)) 284 | assert response.status_code == 400 285 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_msm2tag_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | import pprint 4 | import psycopg2.extras 5 | import datetime 6 | 7 | from .db_test_classes import APIWriteTest 8 | from tests.util import parse_test_db_config 9 | 10 | from tag2domain_api.app.main import app 11 | 12 | pprinter = pprint.PrettyPrinter(indent=4) 13 | client = TestClient(app) 14 | 15 | 16 | _, INTXN_TABLE_MAPPINGS = parse_test_db_config() 17 | 18 | 19 | class MSM2TagEndpointMSM2TAGTest(APIWriteTest): 20 | def test_post_measurement(self): 21 | msm = { 22 | "version": "1", 23 | "tag_type": "intersection", 24 | "tagged_id": 3, 25 | "taxonomy": "tax_test1", 26 | "producer": "test", 27 | "measured_at": "2020-12-22T12:35:32", 28 | "measurement_id": "test/12345", 29 | "tags": [ 30 | { 31 | "tag": "test_tag_1_tax_1", 32 | "value": "value_1_tag_1" 33 | }, 34 | { 35 | "tag": "test_tag_3_tax_1" 36 | }, 37 | ] 38 | } 39 | response = client.post( 40 | "/api/v1/msm2tag/", 41 | json=msm 42 | ) 43 | pprinter.pprint(response.json()) 44 | assert response.status_code == 200 45 | assert response.json() == {"message": "OK"} 46 | 47 | cursor = self.db_connection.cursor( 48 | cursor_factory=psycopg2.extras.RealDictCursor 49 | ) 50 | 51 | sql = ( 52 | """ 53 | SELECT 54 | %(id)s AS id, 55 | %(taxonomy_id)s AS taxonomy_id, 56 | %(tag_id)s AS tag_id, 57 | %(value_id)s AS value_id, 58 | %(measured_at)s AS measured_at, 59 | %(producer)s AS producer, 60 | %(start_date)s AS start_date, 61 | %(end_date)s AS end_date, 62 | %(start_ts)s AS start_ts, 63 | %(end_ts)s AS end_ts 64 | FROM %(table_name)s 65 | WHERE 66 | %(id)s = 3 67 | AND (%(taxonomy_id)s = 1) 68 | AND (%(id)s = 3) 69 | """ % INTXN_TABLE_MAPPINGS["intersection"] 70 | ) 71 | 72 | cursor.execute(sql) 73 | 74 | rows = cursor.fetchall() 75 | 76 | assert len(rows) == 2 77 | 78 | rows = [ 79 | {key: val for key, val in _d.items()} 80 | for _d in rows 81 | ] 82 | pprinter.pprint(rows) 83 | assert rows == [ 84 | { 85 | 'end_date': None, 86 | 'end_ts': None, 87 | 'id': 3, 88 | 'measured_at': datetime.datetime(2020, 12, 22, 12, 35, 32, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 89 | 'producer': 'test', 90 | 'start_date': 20201222, 91 | 'start_ts': datetime.datetime(2020, 12, 22, 12, 35, 32, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 92 | 'tag_id': 1, 93 | 'taxonomy_id': 1, 94 | 'value_id': 1 95 | }, 96 | { 97 | 'end_date': None, 98 | 'end_ts': None, 99 | 'id': 3, 100 | 'measured_at': datetime.datetime(2020, 12, 22, 12, 35, 32, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 101 | 'producer': 'test', 102 | 'start_date': 20201222, 103 | 'start_ts': datetime.datetime(2020, 12, 22, 12, 35, 32, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 104 | 'tag_id': 3, 105 | 'taxonomy_id': 1, 106 | 'value_id': None 107 | } 108 | ] 109 | 110 | def test_post_measurement_fail_empty_msm(self): 111 | msm = {} 112 | response = client.post( 113 | "/api/v1/msm2tag/", 114 | json=msm 115 | ) 116 | pprinter.pprint(response.json()) 117 | assert response.status_code == 422 118 | 119 | def test_post_measurement_fail_invalid_tag_type(self): 120 | msm = { 121 | "version": "1", 122 | "tag_type": "phantasy_tag_type", 123 | "tagged_id": 3, 124 | "taxonomy": "tax_test1", 125 | "producer": "test", 126 | "measured_at": "2020-12-22T12:35:32", 127 | "measurement_id": "test/12345", 128 | "tags": [ 129 | { 130 | "tag": "test_tag_1_tax_1", 131 | "value": "value_1_tag_1" 132 | }, 133 | { 134 | "tag": "test_tag_3_tax_1" 135 | }, 136 | ] 137 | } 138 | response = client.post( 139 | "/api/v1/msm2tag/", 140 | json=msm 141 | ) 142 | pprinter.pprint(response.json()) 143 | assert response.status_code == 400 144 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_stats_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | import pprint 4 | from urllib.parse import urlencode 5 | 6 | from tag2domain_api.app.main import app 7 | 8 | from .db_test_classes import APIReadOnlyTest, APIWithAdditionalDBDataTest 9 | 10 | pprinter = pprint.PrettyPrinter(indent=4) 11 | client = TestClient(app) 12 | 13 | 14 | class StatsEndpointTaxonomiesTest(APIReadOnlyTest): 15 | def test_taxonomy_stats_all(self): 16 | response = client.get("/api/v1/stats/taxonomies") 17 | pprinter.pprint(response.json()) 18 | assert response.status_code == 200 19 | assert response.json() == [ 20 | {'count': 2, 'taxonomy_name': 'tax_test1'}, 21 | {'count': 1, 'taxonomy_name': 'tax_test3'} 22 | ] 23 | 24 | def test_taxonomy_stats_past(self): 25 | query = { 26 | 'at_time': '2020-10-01T12:00:00' 27 | } 28 | response = client.get("/api/v1/stats/taxonomies?%s" % urlencode(query)) 29 | pprinter.pprint(response.json()) 30 | assert response.status_code == 200 31 | assert response.json() == [ 32 | {'count': 2, 'taxonomy_name': 'tax_test1'}, 33 | {'count': 1, 'taxonomy_name': 'tax_test3'} 34 | ] 35 | 36 | def test_taxonomy_stats_distant_past(self): 37 | query = { 38 | 'at_time': '2010-10-01T12:00:00' 39 | } 40 | response = client.get("/api/v1/stats/taxonomies?%s" % urlencode(query)) 41 | pprinter.pprint(response.json()) 42 | assert response.status_code == 200 43 | assert response.json() == [] 44 | 45 | def test_taxonomy_stats_filter(self): 46 | query = { 47 | 'filter': 'registrar-id=1' 48 | } 49 | response = client.get("/api/v1/stats/taxonomies?%s" % urlencode(query)) 50 | pprinter.pprint(response.json()) 51 | assert response.status_code == 200 52 | assert response.json() == [ 53 | {'count': 1, 'taxonomy_name': 'tax_test1'}, 54 | {'count': 1, 'taxonomy_name': 'tax_test3'} 55 | ] 56 | 57 | def test_taxonomy_stats_filter_past(self): 58 | query = { 59 | 'at_time': '2020-10-01T12:00:00', 60 | 'filter': 'registrar-id=1' 61 | } 62 | response = client.get("/api/v1/stats/taxonomies?%s" % urlencode(query)) 63 | pprinter.pprint(response.json()) 64 | assert response.status_code == 200 65 | assert response.json() == [ 66 | {'count': 1, 'taxonomy_name': 'tax_test1'}, 67 | {'count': 1, 'taxonomy_name': 'tax_test3'} 68 | ] 69 | 70 | 71 | class StatsEndpointCategoriesTest(APIWithAdditionalDBDataTest): 72 | def setUp(self): 73 | super(StatsEndpointCategoriesTest, self).setUp("tags_categories") 74 | 75 | def test_category_stats_all(self): 76 | query = {"taxonomy": "tax_test4"} 77 | response = client.get("/api/v1/stats/categories?%s" % urlencode(query)) 78 | pprinter.pprint(response.json()) 79 | assert response.status_code == 200 80 | assert response.json() == [ 81 | {'category': 'cat_2', 'count': 2}, 82 | {'category': '', 'count': 1}, 83 | {'category': 'cat_1', 'count': 1} 84 | ] 85 | 86 | def test_category_stats_past(self): 87 | query = { 88 | "taxonomy": "tax_test4", 89 | "at_time": "2020-06-01T12:00:00" 90 | } 91 | response = client.get("/api/v1/stats/categories?%s" % urlencode(query)) 92 | pprinter.pprint(response.json()) 93 | assert response.status_code == 200 94 | assert response.json() == [ 95 | {'category': 'cat_2', 'count': 2}, 96 | {'category': '', 'count': 1}, 97 | {'category': 'cat_1', 'count': 1} 98 | ] 99 | 100 | def test_category_stats_distant_past(self): 101 | query = { 102 | "taxonomy": "tax_test4", 103 | "at_time": "2010-06-01T12:00:00" 104 | } 105 | response = client.get("/api/v1/stats/categories?%s" % urlencode(query)) 106 | pprinter.pprint(response.json()) 107 | assert response.status_code == 200 108 | assert response.json() == [] 109 | 110 | def test_category_stats_filter(self): 111 | query = { 112 | "taxonomy": "tax_test4", 113 | "filter": "registrar-id=1" 114 | } 115 | response = client.get("/api/v1/stats/categories?%s" % urlencode(query)) 116 | pprinter.pprint(response.json()) 117 | assert response.status_code == 200 118 | assert response.json() == [ 119 | {'category': 'cat_2', 'count': 1}, 120 | {'category': '', 'count': 1}, 121 | {'category': 'cat_1', 'count': 1} 122 | ] 123 | 124 | 125 | class StatsEndpointTagsTest(APIWithAdditionalDBDataTest): 126 | def setUp(self): 127 | super(StatsEndpointTagsTest, self).setUp("tags_categories") 128 | 129 | def test_tag_stats_tax_test1_all(self): 130 | query = { 131 | "taxonomy": "tax_test1" 132 | } 133 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 134 | pprinter.pprint(response.json()) 135 | assert response.status_code == 200 136 | assert response.json() == [ 137 | {'count': 2, 'tag_name': 'test_tag_1_tax_1'}, 138 | {'count': 1, 'tag_name': 'test_tag_2_tax_1'} 139 | ] 140 | 141 | def test_tag_stats_tax_test1_past(self): 142 | query = { 143 | "taxonomy": "tax_test1", 144 | "at_time": "2020-06-01T12:00:00" 145 | } 146 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 147 | pprinter.pprint(response.json()) 148 | assert response.status_code == 200 149 | assert response.json() == [ 150 | {'count': 2, 'tag_name': 'test_tag_1_tax_1'}, 151 | {'count': 1, 'tag_name': 'test_tag_2_tax_1'}, 152 | {'count': 1, 'tag_name': 'test_tag_3_tax_1'} 153 | ] 154 | 155 | def test_tag_stats_tax_test1_distant_past(self): 156 | query = { 157 | "taxonomy": "tax_test1", 158 | "at_time": "2010-06-01T12:00:00" 159 | } 160 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 161 | pprinter.pprint(response.json()) 162 | assert response.status_code == 200 163 | assert response.json() == [] 164 | 165 | def test_tag_stats_tax_test1_filter(self): 166 | query = { 167 | "taxonomy": "tax_test1", 168 | "filter": "registrar-id=1" 169 | } 170 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 171 | pprinter.pprint(response.json()) 172 | assert response.status_code == 200 173 | assert response.json() == [ 174 | {'count': 1, 'tag_name': 'test_tag_1_tax_1'}, 175 | {'count': 1, 'tag_name': 'test_tag_2_tax_1'} 176 | ] 177 | 178 | def test_tag_stats_tax_test1_past_filter(self): 179 | query = { 180 | "taxonomy": "tax_test1", 181 | "at_time": "2020-06-01T12:00:00", 182 | "filter": "registrar-id=1" 183 | } 184 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 185 | pprinter.pprint(response.json()) 186 | assert response.status_code == 200 187 | assert response.json() == [ 188 | {'count': 2, 'tag_name': 'test_tag_1_tax_1'}, 189 | {'count': 1, 'tag_name': 'test_tag_2_tax_1'}, 190 | {'count': 1, 'tag_name': 'test_tag_3_tax_1'} 191 | ] 192 | 193 | def test_tag_stats_cat_1(self): 194 | query = { 195 | "taxonomy": "tax_test4", 196 | "category": "cat_1" 197 | } 198 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 199 | pprinter.pprint(response.json()) 200 | assert response.status_code == 200 201 | assert response.json() == [ 202 | {'count': 1, 'tag_name': 'cat_1::test_tag_1_tax_4'}, 203 | {'count': 1, 'tag_name': 'cat_1::test_tag_2_tax_4'} 204 | ] 205 | 206 | def test_tag_stats_no_cat(self): 207 | query = { 208 | "taxonomy": "tax_test4", 209 | "category": "" 210 | } 211 | response = client.get("/api/v1/stats/tags?%s" % urlencode(query)) 212 | pprinter.pprint(response.json()) 213 | assert response.status_code == 200 214 | assert response.json() == [ 215 | {'count': 1, 'tag_name': 'test_tag_4_tax_4'} 216 | ] 217 | 218 | 219 | class StatsEndpointValuesTest(APIReadOnlyTest): 220 | def test_value_stats__tag_1_tax_1(self): 221 | query = { 222 | "taxonomy": "tax_test1", 223 | "tag": "test_tag_1_tax_1" 224 | } 225 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 226 | pprinter.pprint(response.json()) 227 | assert response.status_code == 200 228 | assert response.json() == [{'count': 1, 'value': 'value_1_tag_1'}] 229 | 230 | def test_value_stats__tag_1_tax_1_past(self): 231 | query = { 232 | "taxonomy": "tax_test1", 233 | "tag": "test_tag_1_tax_1", 234 | "at_time": "2020-06-01T12:00:00" 235 | } 236 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 237 | pprinter.pprint(response.json()) 238 | assert response.status_code == 200 239 | assert response.json() == [{'count': 1, 'value': 'value_1_tag_1'}] 240 | 241 | def test_value_stats__tag_1_tax_1_distant_past(self): 242 | query = { 243 | "taxonomy": "tax_test1", 244 | "tag": "test_tag_1_tax_1", 245 | "at_time": "2010-06-01T12:00:00" 246 | } 247 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 248 | pprinter.pprint(response.json()) 249 | assert response.status_code == 200 250 | assert response.json() == [] 251 | 252 | def test_value_stats__tag_1_tax_1_filter(self): 253 | query = { 254 | "taxonomy": "tax_test1", 255 | "tag": "test_tag_1_tax_1", 256 | "filter": "registrar-id=1" 257 | } 258 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 259 | pprinter.pprint(response.json()) 260 | assert response.status_code == 200 261 | assert response.json() == [] 262 | 263 | def test_value_stats__tag_1_tax_1_filter_past(self): 264 | query = { 265 | "taxonomy": "tax_test1", 266 | "tag": "test_tag_1_tax_1", 267 | "filter": "registrar-id=1", 268 | "at_time": "2020-06-01T12:00:00" 269 | } 270 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 271 | pprinter.pprint(response.json()) 272 | assert response.status_code == 200 273 | assert response.json() == [{'count': 1, 'value': 'value_1_tag_1'}] 274 | 275 | def test_value_stats__tag_1_tax_1_filter2(self): 276 | query = { 277 | "taxonomy": "tax_test1", 278 | "tag": "test_tag_1_tax_1", 279 | "filter": "registrar-id=2" 280 | } 281 | response = client.get("/api/v1/stats/values?%s" % urlencode(query)) 282 | pprinter.pprint(response.json()) 283 | assert response.status_code == 200 284 | assert response.json() == [] 285 | -------------------------------------------------------------------------------- /tests/tests_tag2domain_api/test_test_endpoints.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from tag2domain_api.app.main import app 4 | 5 | from .db_test_classes import APIReadOnlyTest 6 | 7 | client = TestClient(app) 8 | 9 | 10 | class TestEndpointsTest(APIReadOnlyTest): 11 | def test_ping(self): 12 | response = client.get("/test/ping") 13 | assert response.status_code == 200 14 | assert response.json() == {"message": "Pong!"} 15 | 16 | def test_self_test(self): 17 | response = client.get("/test/self-test") 18 | assert response.status_code == 200 19 | assert response.json() == {"message": "OK"} 20 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import copy 4 | from unittest import TestCase 5 | import binascii 6 | 7 | import psycopg2 8 | import psycopg2.sql as sql 9 | from psycopg2.extensions import ( 10 | ISOLATION_LEVEL_AUTOCOMMIT, 11 | ISOLATION_LEVEL_DEFAULT 12 | ) 13 | 14 | import tests 15 | from py_tag2domain.util import parse_config 16 | from py_tag2domain.db import Psycopg2Adapter 17 | 18 | 19 | def parse_test_db_config(): 20 | DB_CONFIG_FILE = os.path.join( 21 | os.path.dirname(os.path.abspath(__file__)), 22 | "config", 23 | "db.cfg" 24 | ) 25 | 26 | return parse_config(DB_CONFIG_FILE) 27 | 28 | 29 | config, TypeIntxnTableMappings = parse_test_db_config() 30 | DB_CONNECTION = Psycopg2Adapter.to_psycopg_args(config) 31 | 32 | 33 | class PostgresReadOnlyDBTest(TestCase): 34 | @classmethod 35 | def setUpClass(cls): 36 | print("setting up class PostgresReadOnlyDBTest") 37 | cls.intxn_table_mappings = TypeIntxnTableMappings 38 | if DB_CONNECTION is None: 39 | cls.no_db_flag = True 40 | else: 41 | cls.no_db_flag = False 42 | if isinstance(DB_CONNECTION, str): 43 | cls.db_connection = psycopg2.connect(DB_CONNECTION) 44 | elif isinstance(DB_CONNECTION, dict): 45 | cls.db_connection = psycopg2.connect(**DB_CONNECTION) 46 | else: 47 | raise ValueError("DB_CONNECTION must be str or dict") 48 | cls.db_connection.set_session(readonly=True) 49 | cls.db_cursor = cls.db_connection.cursor() 50 | 51 | @classmethod 52 | def tearDownClass(cls): 53 | print("tearing down class PostgresReadOnlyDBTest") 54 | if not cls.no_db_flag: 55 | cls.db_connection.close() 56 | 57 | def setUp(self): 58 | print("setting up instance of PostgresReadOnlyDBTest") 59 | if self.__class__.no_db_flag: 60 | self.skipTest("no db connection specified - skipping DB tests") 61 | 62 | 63 | def load_test_db_setup(config): 64 | t2d_db_schema_path = os.path.join( 65 | "db", 66 | "00-tag2domain-db-init", 67 | "sql" 68 | ) 69 | schema_setup = [] 70 | for name in sorted(os.listdir(t2d_db_schema_path)): 71 | if not name.endswith(".sql"): 72 | continue 73 | _file_path = os.path.join(t2d_db_schema_path, name) 74 | logging.debug("loading DB schema script from %s" % _file_path) 75 | _f = open(_file_path) 76 | schema_setup.append( 77 | _f 78 | .read() 79 | .replace(":t2d_schema", config["DBSCHEMA"]) 80 | ) 81 | _f.close() 82 | 83 | # load the test DB setup script 84 | t2d_db_test_schema_path = os.path.join( 85 | os.path.dirname(tests.__file__), 86 | "db_mock_data", 87 | "basic", 88 | "tag2domain_db_test_schema.sql" 89 | ) 90 | logging.debug("loading DB schema script from %s" % t2d_db_test_schema_path) 91 | _f = open(t2d_db_test_schema_path) 92 | schema_setup.append(_f.read()) 93 | _f.close() 94 | 95 | # load the test DB glue script 96 | t2d_db_test_glue_path = os.path.join( 97 | os.path.dirname(tests.__file__), 98 | "db_mock_data", 99 | "basic", 100 | "tag2domain_db_test_glue_views.sql" 101 | ) 102 | logging.debug("loading DB glue script from %s" % t2d_db_test_glue_path) 103 | _f = open(t2d_db_test_glue_path) 104 | schema_setup.append(_f.read()) 105 | _f.close() 106 | 107 | schema_setup = ';\n'.join(schema_setup) 108 | 109 | t2d_db_test_data_path = os.path.join( 110 | os.path.dirname(tests.__file__), 111 | "db_mock_data", 112 | "basic", 113 | "tag2domain_db_test_data.sql" 114 | ) 115 | logging.debug("loading DB data script from %s" % t2d_db_test_data_path) 116 | _f = open(t2d_db_test_data_path) 117 | data_setup = _f.read() 118 | 119 | return schema_setup, data_setup 120 | 121 | 122 | def setup_test_db( 123 | connection_args, 124 | new_db_name, 125 | base_db_conn, 126 | base_db_cursor, 127 | schema_setup, 128 | data_setup 129 | ): 130 | db_conn = None 131 | db_cursor = None 132 | try: 133 | base_db_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) 134 | base_db_cursor.execute(sql.SQL( 135 | "CREATE DATABASE {}" 136 | ).format(sql.Identifier(new_db_name,))) 137 | base_db_conn.set_isolation_level(ISOLATION_LEVEL_DEFAULT) 138 | 139 | # Connect to the new database 140 | _db_connection = copy.deepcopy(connection_args) 141 | _db_connection["dbname"] = new_db_name 142 | db_conn = psycopg2.connect(**_db_connection) 143 | 144 | db_conn.set_session(autocommit=True) 145 | db_cursor = db_conn.cursor() 146 | 147 | # Run the setup scripts 148 | db_cursor.execute(schema_setup) 149 | db_cursor.execute(data_setup) 150 | 151 | db_conn.set_session(autocommit=False) 152 | # reset the adapter to the new database 153 | return db_conn, db_cursor 154 | except Exception: 155 | # if something goes wrong, reinstate the old DB connection 156 | # and reraise to propagate the exception to the caller. 157 | if db_conn is not None: 158 | logging.debug("closing DB connection (exception)") 159 | db_conn.close() 160 | raise 161 | 162 | 163 | def remove_test_db(db_conn, db_name): 164 | logging.debug("removing database %s" % db_name) 165 | db_conn.set_isolation_level( 166 | ISOLATION_LEVEL_AUTOCOMMIT 167 | ) 168 | db_conn.cursor().execute(sql.SQL( 169 | "DROP DATABASE {}" 170 | ).format(sql.Identifier(db_name,))) 171 | db_conn.set_isolation_level( 172 | ISOLATION_LEVEL_DEFAULT 173 | ) 174 | 175 | 176 | class PostgresAutoDBTest(TestCase): 177 | @classmethod 178 | def setUpClass(cls): 179 | print("setting up class PostgresAutoDBTest") 180 | # load the db setup scripts 181 | cls.schema_setup, cls.data_setup = load_test_db_setup(config) 182 | 183 | cls.db_prefix = \ 184 | "tag2domain_mock_db_%s" % (binascii.b2a_hex(os.urandom(15)).decode()) 185 | print("prefixing generated DBs by %s" % cls.db_prefix) 186 | logging.debug("prefixing generated DBs by %s" % cls.db_prefix) 187 | cls.intxn_table_mappings = TypeIntxnTableMappings 188 | 189 | def setUp(self): 190 | print("setting up instance of PostgresAutoDBTest") 191 | if DB_CONNECTION is None: 192 | self.skipTest("no db connection specified - skipping DB tests") 193 | 194 | if isinstance(DB_CONNECTION, str): 195 | self.db_connection = psycopg2.connect(DB_CONNECTION) 196 | elif isinstance(DB_CONNECTION, dict): 197 | self.db_connection = psycopg2.connect(**DB_CONNECTION) 198 | else: 199 | raise ValueError("DB_CONNECTION must be str or dict") 200 | 201 | self.db_cursor = self.db_connection.cursor() 202 | 203 | self.db_name = "%s__%s" % ( 204 | self.__class__.db_prefix, 205 | self._testMethodName 206 | ) 207 | logging.debug("creating fresh database %s" % self.db_name) 208 | 209 | self.base_db_conn = self.db_connection 210 | self.base_db_cursor = self.db_cursor 211 | 212 | try: 213 | self.db_connection, self.db_cursor = \ 214 | setup_test_db( 215 | DB_CONNECTION, 216 | self.db_name, 217 | self.base_db_conn, 218 | self.base_db_cursor, 219 | self.__class__.schema_setup, 220 | self.__class__.data_setup 221 | ) 222 | except Exception: 223 | # reset the db connection and reraise to fail the test 224 | logging.debug("reset DB connection to base (exception)") 225 | self.db_connection = self.base_db_conn 226 | self.db_cursor = self.base_db_cursor 227 | 228 | self.base_db_conn = None 229 | self.base_db_cursor = None 230 | 231 | def tearDown(self): 232 | print("tearing down instance of PostgresAutoDBTest") 233 | # close the database connection and reset the base connection 234 | if self.base_db_conn is not None: 235 | logging.debug("close the database connection to the auto DB") 236 | self.db_connection.close() 237 | logging.debug("reset DB connection to base") 238 | self.db_connection = self.base_db_conn 239 | self.db_cursor = self.base_db_cursor 240 | 241 | # Remove the test database 242 | assert hasattr(self, 'db_name') 243 | remove_test_db(self.db_connection, self.db_name) 244 | else: 245 | logging.debug("no auto DB handle found") 246 | 247 | def loadTestCase(self, name): 248 | data_path = os.path.join( 249 | os.path.dirname(tests.__file__), 250 | "db_mock_data", 251 | "test_cases", 252 | name + ".sql" 253 | ) 254 | 255 | logging.debug("loading test case from %s" % data_path) 256 | _f = open(data_path) 257 | data_setup = _f.read() 258 | 259 | self.db_connection.set_session(autocommit=True) 260 | db_cursor = self.db_connection.cursor() 261 | 262 | # Run the setup scripts 263 | db_cursor.execute(data_setup) 264 | 265 | self.db_connection.set_session(autocommit=False) 266 | --------------------------------------------------------------------------------