();
5 |
6 | // Patch PureComponent type declaration so that we can access React internal
7 | // variables. We disable eslint here because the declaration has to match the
8 | // the declaration from @types/react package.
9 | declare module 'react' {
10 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
11 | interface PureComponent
12 | extends React.Component
{
13 | _reactInternalFiber: {
14 | key: string;
15 | type: {
16 | displayName: string;
17 | name: string;
18 | };
19 | };
20 | }
21 | }
22 |
23 | /**
24 | * This component uses the key provided to a component to generate a cache key for the data.
25 | * We chose to use key for the following reasons:
26 | * 1. React uses key to identify if the element associated with the component.
27 | * In some cases this helps it to identify that two instance are the same, and avoid re-constructing the instance.
28 | * It is expected that this strategy will help react to avoid destroying a component unnecessarily.
29 | * 2. React does some work to avoid siblings with the same key, This should provide some warnings when reusing a key.
30 | * 3. Since it is an internal from each component, it doesn't pollute the props of components.
31 | *
32 | */
33 | export class PersistentComponent<
34 | TProps = {},
35 | TState = {}
36 | > extends PureComponent {
37 | componentDidMount() {
38 | if (!this._reactInternalFiber.key) {
39 | console.warn(
40 | 'When using PersistentComponent please provide the key prop'
41 | );
42 | }
43 | const cacheKey = this.getCacheKey();
44 | const previousState = cache.get(cacheKey);
45 | if (previousState && !shallowEqual(this.state, previousState)) {
46 | this.setState(previousState);
47 | }
48 | }
49 |
50 | componentWillUnmount() {
51 | const key = this.getCacheKey();
52 | cache.set(key, this.state);
53 | }
54 |
55 | private getCacheKey() {
56 | const name =
57 | this._reactInternalFiber.type.displayName ||
58 | this._reactInternalFiber.type.name;
59 | return `${name}-${this._reactInternalFiber.key}`;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/lib_core/datamart_core/augment.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 | import uuid
4 |
5 | from datamart_augmentation import AugmentationError, join, union
6 |
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def augment(data, newdata, metadata, task, writer, columns=None):
12 | """
13 | Augments original data based on the task.
14 |
15 | :param data: the data to be augmented, as binary file object.
16 | :param newdata: the path to the CSV file to augment with.
17 | :param metadata: the metadata of the data to be augmented.
18 | :param task: the augmentation task.
19 | :param writer: Writer on which to save the files.
20 | :param columns: a list of column indices from newdata that will be added to data
21 | well with data.
22 | """
23 |
24 | if 'id' not in task:
25 | raise AugmentationError("Dataset id for the augmentation task not provided")
26 |
27 | # TODO: add support for combining multiple columns before an augmentation
28 | # e.g.: [['street number', 'street', 'city']] and [['address']]
29 | # currently, Datamart does not support such cases
30 | # this means that spatial joins (with GPS) are not supported for now
31 |
32 | # Perform augmentation
33 | start = time.perf_counter()
34 | if task['augmentation']['type'] == 'join':
35 | output_metadata = join(
36 | data,
37 | newdata,
38 | metadata,
39 | task['metadata'],
40 | writer,
41 | task['augmentation']['left_columns'],
42 | task['augmentation']['right_columns'],
43 | columns=columns,
44 | agg_functions=task['augmentation'].get('agg_functions'),
45 | temporal_resolution=task['augmentation'].get('temporal_resolution'),
46 | )
47 | elif task['augmentation']['type'] == 'union':
48 | output_metadata = union(
49 | data,
50 | newdata,
51 | metadata,
52 | task['metadata'],
53 | writer,
54 | task['augmentation']['left_columns'],
55 | task['augmentation']['right_columns'],
56 | )
57 | else:
58 | raise AugmentationError("Augmentation task not provided")
59 | logger.info("Total augmentation: %.4fs", time.perf_counter() - start)
60 |
61 | # Write out the metadata
62 | writer.set_metadata(uuid.uuid4().hex, output_metadata)
63 | return writer.finish()
64 |
--------------------------------------------------------------------------------
/scripts/migrate-types-and-attributes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """This scripts updates the index for !115 and !127.
4 |
5 | It adds the dataset "types" information (computed from column semantic types)
6 | and the "attribute_keywords" field (compute from column names).
7 | """
8 |
9 | import json
10 | import os
11 | import shutil
12 | import sys
13 |
14 | from datamart_profiler.core import expand_attribute_name
15 | from datamart_profiler.profile_types import determine_dataset_type
16 |
17 |
18 | def migrate(from_folder, to_folder):
19 | assert os.listdir(from_folder)
20 | assert not os.listdir(to_folder)
21 |
22 | datasets = []
23 | lazo = []
24 | for f in os.listdir(from_folder):
25 | if f.startswith('lazo.'):
26 | lazo.append(f)
27 | else:
28 | datasets.append(f)
29 |
30 | for i, dataset in enumerate(datasets):
31 | if i % 100 == 0:
32 | print("% 5d / %5d datasets processed" % (i, len(datasets)))
33 |
34 | with open(os.path.join(from_folder, dataset)) as fp:
35 | obj = json.load(fp)
36 |
37 | if 'attribute_keywords' not in obj:
38 | attribute_keywords = []
39 | for col in obj['columns']:
40 | attribute_keywords.append(col['name'])
41 | kw = list(expand_attribute_name(col['name']))
42 | if kw != [col['name']]:
43 | attribute_keywords.extend(kw)
44 | obj['attribute_keywords'] = attribute_keywords
45 |
46 | if 'types' not in obj:
47 | dataset_types = set()
48 | for col in obj['columns']:
49 | type_ = determine_dataset_type(
50 | col['structural_type'],
51 | col['semantic_types'],
52 | )
53 | if type_:
54 | dataset_types.add(type_)
55 | obj['types'] = sorted(dataset_types)
56 |
57 | with open(os.path.join(to_folder, dataset), 'w') as fp:
58 | json.dump(obj, fp, sort_keys=True, indent=2)
59 |
60 | print("Copying lazo data...")
61 | for i, f in enumerate(lazo):
62 | if i % 1000 == 0:
63 | print("% 5d / %5d files copied" % (i, len(lazo)))
64 | shutil.copy2(
65 | os.path.join(from_folder, f),
66 | os.path.join(to_folder, f),
67 | )
68 |
69 |
70 | if __name__ == '__main__':
71 | migrate(sys.argv[1], sys.argv[2])
72 |
--------------------------------------------------------------------------------
/frontend/src/components/visus/Card/Card.tsx:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 | import styled from 'styled-components';
3 | import './card.css';
4 |
5 | interface CardProps {
6 | title: string;
7 | className?: string;
8 | style?: React.CSSProperties;
9 | }
10 |
11 | class Card extends React.PureComponent {
12 | render() {
13 | const cardClassName = this.props.className
14 | ? 'card ' + this.props.className
15 | : 'card';
16 | return (
17 |
18 |
19 | {this.props.title ? (
20 |
{this.props.title}
21 | ) : (
22 | ''
23 | )}
24 | {this.props.children}
25 |
26 |
27 | );
28 | }
29 | }
30 |
31 | interface CardShadowProps {
32 | className?: string;
33 | height?: string;
34 | }
35 |
36 | class CardShadow extends React.PureComponent {
37 | render() {
38 | const cardClassName = this.props.className
39 | ? 'card-hover card card-attributes' + this.props.className
40 | : 'card-hover card card-attributes';
41 | return (
42 |
50 |
{this.props.children}
51 |
52 | );
53 | }
54 | }
55 |
56 | interface CardAttrFieldProps {
57 | textAlign?: string;
58 | width?: string;
59 | fontWeight?: string;
60 | padding?: string;
61 | }
62 |
63 | const CardAttrField = styled.div`
64 | font-weight: ${({fontWeight}) => fontWeight || 'normal'};
65 | text-align: ${({textAlign}) => textAlign || 'right'};
66 | width: ${({width}) => width || '110px'};
67 | padding: ${({padding}) => padding || '0 15px'};
68 | `;
69 |
70 | const CardAttrValue = styled.div`
71 | flex: 1;
72 | padding-right: 15px;
73 | overflow-wrap: break-word;
74 | word-wrap: break-word;
75 | word-break: break-word;
76 | `;
77 |
78 | export const CardButton = styled.div`
79 | display: flex;
80 | justify-content: center;
81 | flex-direction: column;
82 | text-align: center;
83 | height: 100%;
84 | cursor: pointer;
85 | `;
86 |
87 | export {Card, CardShadow, CardAttrField, CardAttrValue};
88 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "frontend",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "@hanreev/types-ol": "^2.0.8",
7 | "@material-ui/core": "^4.11.1",
8 | "@testing-library/jest-dom": "^4.2.4",
9 | "@testing-library/react": "^9.4.1",
10 | "@testing-library/user-event": "^7.2.1",
11 | "@types/d3-scale": "^3.2.2",
12 | "@types/jest": "^24.9.1",
13 | "@types/node": "^12.12.29",
14 | "@types/ol": "^5.3.7",
15 | "@types/react": "^16.9.23",
16 | "@types/react-dom": "^16.9.5",
17 | "@types/react-router-dom": "^5.1.3",
18 | "@types/react-table": "^7.0.10",
19 | "@types/styled-components": "^5.0.1",
20 | "axios": "^0.21.4",
21 | "d3-scale": "^3.2.4",
22 | "moment": "^2.24.0",
23 | "ol": "^5.3.3",
24 | "react": "^16.13.1",
25 | "react-datepicker": "^2.13.0",
26 | "react-dnd": "^10.0.2",
27 | "react-dnd-html5-backend": "^10.0.2",
28 | "react-dom": "^16.13.0",
29 | "react-dropzone": "^10.2.1",
30 | "react-feather": "^2.0.3",
31 | "react-router-dom": "^5.1.2",
32 | "react-scripts": "4.0.3",
33 | "react-table": "^7.0.0",
34 | "react-vega": "^7.3.0",
35 | "styled-components": "^5.0.1",
36 | "vega": "^5.11.1",
37 | "vega-lite": "^4.12.0"
38 | },
39 | "scripts": {
40 | "start": "react-scripts start",
41 | "build": "react-scripts build",
42 | "test": "react-scripts test",
43 | "eject": "react-scripts eject",
44 | "check": "gts check",
45 | "clean": "gts clean",
46 | "compile": "tsc -p .",
47 | "fix": "gts fix",
48 | "pretest": "npm run compile",
49 | "posttest": "npm run check"
50 | },
51 | "eslintConfig": {
52 | "extends": "react-app"
53 | },
54 | "browserslist": {
55 | "production": [
56 | ">0.2%",
57 | "not dead",
58 | "not op_mini all"
59 | ],
60 | "development": [
61 | "last 1 chrome version",
62 | "last 1 firefox version",
63 | "last 1 safari version"
64 | ]
65 | },
66 | "devDependencies": {
67 | "@types/node": "^12.12.29",
68 | "@types/react-datepicker": "^2.11.0",
69 | "canvas": "^2.8.0",
70 | "eslint-plugin-react": "^7.20.6",
71 | "gts": "2.0.2",
72 | "jest-canvas-mock": "^2.2.0",
73 | "typescript": "^3.9.10"
74 | },
75 | "jest": {
76 | "transformIgnorePatterns": [
77 | "/node_modules/(?!ol).+\\.js$"
78 | ]
79 | },
80 | "proxy": "https://auctus.vida-nyu.org/api/v1"
81 | }
82 |
--------------------------------------------------------------------------------
/scripts/migrate-temporal-coverage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """This scripts updates the index for !162.
4 |
5 | It creates the 'temporal_coverage' (in 'datasets' index) and the
6 | 'temporal_coverage' index.
7 | """
8 |
9 | import json
10 | import os
11 | import shutil
12 | import sys
13 |
14 |
15 | def migrate(from_folder, to_folder):
16 | assert os.listdir(from_folder)
17 | assert not os.listdir(to_folder)
18 |
19 | datasets = []
20 | lazo = []
21 | for f in os.listdir(from_folder):
22 | if f.startswith('lazo.'):
23 | lazo.append(f)
24 | else:
25 | datasets.append(f)
26 |
27 | for i, dataset in enumerate(datasets):
28 | if i % 100 == 0:
29 | print("% 5d / %5d datasets processed" % (i, len(datasets)))
30 |
31 | with open(os.path.join(from_folder, dataset)) as fp:
32 | obj = json.load(fp)
33 |
34 | if 'temporal_coverage' not in obj:
35 | temporal_coverage = []
36 | for idx, column in enumerate(obj['columns']):
37 | if 'http://schema.org/DateTime' in column['semantic_types']:
38 | coverage = {
39 | 'type': 'datetime',
40 | 'column_names': [column['name']],
41 | 'column_indexes': [idx],
42 | 'column_types': ['http://schema.org/DateTime'],
43 | 'ranges': column.pop('coverage'),
44 | }
45 | column.pop('mean', None)
46 | column.pop('stddev', None)
47 | if 'temporal_resolution' in column:
48 | coverage['temporal_resolution'] = \
49 | column.pop('temporal_resolution')
50 | temporal_coverage.append(coverage)
51 |
52 | if temporal_coverage:
53 | obj['temporal_coverage'] = temporal_coverage
54 |
55 | with open(os.path.join(to_folder, dataset), 'w') as fp:
56 | json.dump(obj, fp, sort_keys=True, indent=2)
57 |
58 | print("Copying lazo data...")
59 | for i, f in enumerate(lazo):
60 | if i % 1000 == 0:
61 | print("% 5d / %5d files copied" % (i, len(lazo)))
62 | shutil.copy2(
63 | os.path.join(from_folder, f),
64 | os.path.join(to_folder, f),
65 | )
66 |
67 |
68 | if __name__ == '__main__':
69 | migrate(sys.argv[1], sys.argv[2])
70 |
--------------------------------------------------------------------------------
/contrib/k8s/discovery/worldbank.libsonnet:
--------------------------------------------------------------------------------
1 | local utils = import '../utils.libsonnet';
2 |
3 | function(
4 | config,
5 | schedule='0 1 * * 1,3,5',
6 | ) {
7 | 'worldbank-cronjob': config.kube('batch/v1beta1', 'CronJob', {
8 | file:: 'discovery.yml',
9 | metadata: {
10 | name: 'worldbank',
11 | labels: {
12 | app: 'auctus',
13 | what: 'worldbank',
14 | },
15 | },
16 | spec: {
17 | schedule: schedule,
18 | jobTemplate: {
19 | metadata: {
20 | labels: {
21 | app: 'auctus',
22 | what: 'worldbank',
23 | },
24 | },
25 | spec: {
26 | template: {
27 | metadata: {
28 | labels: {
29 | app: 'auctus',
30 | what: 'worldbank',
31 | },
32 | },
33 | spec: {
34 | restartPolicy: 'Never',
35 | securityContext: {
36 | runAsNonRoot: true,
37 | },
38 | containers: [
39 | {
40 | name: 'worldbank',
41 | image: config.image,
42 | imagePullPolicy: 'IfNotPresent',
43 | args: ['python', '-m', 'worldbank_discovery'],
44 | env: utils.env(
45 | {
46 | LOG_FORMAT: config.log_format,
47 | ELASTICSEARCH_HOSTS: 'elasticsearch:9200',
48 | ELASTICSEARCH_PREFIX: config.elasticsearch.prefix,
49 | AMQP_HOST: 'rabbitmq',
50 | AMQP_PORT: '5672',
51 | AMQP_USER: {
52 | secretKeyRef: {
53 | name: 'secrets',
54 | key: 'amqp.user',
55 | },
56 | },
57 | AMQP_PASSWORD: {
58 | secretKeyRef: {
59 | name: 'secrets',
60 | key: 'amqp.password',
61 | },
62 | },
63 | LAZO_SERVER_HOST: 'lazo',
64 | LAZO_SERVER_PORT: '50051',
65 | }
66 | + utils.object_store_env(config.object_store)
67 | ),
68 | },
69 | ],
70 | },
71 | },
72 | },
73 | },
74 | },
75 | }),
76 | }
77 |
--------------------------------------------------------------------------------
/contrib/k8s/discovery/uaz-indicators.libsonnet:
--------------------------------------------------------------------------------
1 | local utils = import '../utils.libsonnet';
2 |
3 | function(
4 | config,
5 | schedule='20 1 * * 1,3,5',
6 | ) {
7 | 'uaz-indicators-cronjob': config.kube('batch/v1beta1', 'CronJob', {
8 | file:: 'discovery.yml',
9 | metadata: {
10 | name: 'uaz-indicators',
11 | labels: {
12 | app: 'auctus',
13 | what: 'uaz-indicators',
14 | },
15 | },
16 | spec: {
17 | schedule: schedule,
18 | jobTemplate: {
19 | metadata: {
20 | labels: {
21 | app: 'auctus',
22 | what: 'uaz-indicators',
23 | },
24 | },
25 | spec: {
26 | template: {
27 | metadata: {
28 | labels: {
29 | app: 'auctus',
30 | what: 'uaz-indicators',
31 | },
32 | },
33 | spec: {
34 | restartPolicy: 'Never',
35 | securityContext: {
36 | runAsNonRoot: true,
37 | },
38 | containers: [
39 | {
40 | name: 'uaz-indicators',
41 | image: config.image,
42 | imagePullPolicy: 'IfNotPresent',
43 | args: ['python', '-m', 'uaz_indicators'],
44 | env: utils.env(
45 | {
46 | LOG_FORMAT: config.log_format,
47 | ELASTICSEARCH_HOSTS: 'elasticsearch:9200',
48 | ELASTICSEARCH_PREFIX: config.elasticsearch.prefix,
49 | AMQP_HOST: 'rabbitmq',
50 | AMQP_PORT: '5672',
51 | AMQP_USER: {
52 | secretKeyRef: {
53 | name: 'secrets',
54 | key: 'amqp.user',
55 | },
56 | },
57 | AMQP_PASSWORD: {
58 | secretKeyRef: {
59 | name: 'secrets',
60 | key: 'amqp.password',
61 | },
62 | },
63 | LAZO_SERVER_HOST: 'lazo',
64 | LAZO_SERVER_PORT: '50051',
65 | }
66 | + utils.object_store_env(config.object_store)
67 | ),
68 | },
69 | ],
70 | },
71 | },
72 | },
73 | },
74 | },
75 | }),
76 | }
77 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "auctus"
3 | version = "0.10"
4 | description = "Auctus system meta-package"
5 |
6 | license = "Apache-2.0"
7 |
8 | authors = [
9 | "Remi Rampin ",
10 | ]
11 |
12 | repository = "https://gitlab.com/ViDA-NYU/auctus/auctus"
13 | homepage = "https://auctus.vida-nyu.org/"
14 |
15 | keywords = ["auctus", "datamart"]
16 |
17 | classifiers = [
18 | "Development Status :: 4 - Beta",
19 | "Intended Audience :: Science/Research",
20 | "Operating System :: Unix",
21 | "Programming Language :: Python :: 3 :: Only",
22 | "Topic :: Scientific/Engineering :: Information Analysis",
23 | ]
24 |
25 | packages = []
26 |
27 | include = []
28 |
29 | [tool.poetry.dependencies]
30 | python = "^3.8,<3.11" # Upper bound for numpy
31 | datamart-profiler = {path = "./lib_profiler", develop=true}
32 | datamart-materialize = {path = "./lib_materialize", develop=true}
33 | datamart-augmentation = {path = "./lib_augmentation", develop=true}
34 | datamart-geo = {path = "lib_geo", develop=true}
35 | datamart-core = {path = "./lib_core", develop=true}
36 | datamart-fslock = {path = "./lib_fslock", develop=true}
37 | datamart-coordinator-service = {path = "./coordinator", develop=true}
38 | datamart-profiler-service = {path = "./profiler", develop=true}
39 | datamart-api-service = {path = "./apiserver", develop=true}
40 | datamart-cache-cleaner-service = {path = "cache_cleaner", develop=true}
41 | datamart-snapshotter-service = {path = "snapshotter", develop=true}
42 | datamart-noaa-discovery-service = {path = "./discovery/noaa", develop=true}
43 | datamart-socrata-discovery-service = {path = "./discovery/socrata", develop=true}
44 | datamart-zenodo-discovery-service = {path = "./discovery/zenodo", develop=true}
45 | datamart-ckan-discovery-service = {path = "./discovery/ckan", develop=true}
46 | datamart-uaz-indicators-service = {path = "./discovery/uaz_indicators", develop=true}
47 | datamart-worldbank-discovery-service = {path= "./discovery/worldbank", develop=true}
48 | datamart-isi-discovery-service = {path= "./discovery/isi", develop=true}
49 | toml = "*"
50 | opentelemetry-exporter-jaeger-thrift = "*"
51 |
52 | [tool.poetry.dev-dependencies]
53 | flake8 = "*"
54 | PyYaml = "*"
55 | requests = "*"
56 | coverage = "*" # Keep it in sync with Dockerfiles for CI
57 | jsonschema = ">=3.0,<4"
58 | readme_renderer = "*"
59 | Sphinx = "*"
60 | sphinx-rtd-theme = "^0.5.0"
61 |
62 | [build-system]
63 | requires = ["poetry-core>=1.0.0"]
64 | build-backend = "poetry.core.masonry.api"
65 |
--------------------------------------------------------------------------------