├── tests
    ├── __init__.py
    ├── data
    │   ├── invalid.bin
    │   ├── empty.csv
    │   ├── excel.xlsx
    │   ├── hourly_aug_days.csv
    │   ├── spss.sav
    │   ├── excel97.xls
    │   ├── stata114.dta
    │   ├── stata118.dta
    │   ├── basic_aug.csv
    │   ├── parquet.parquet
    │   ├── agg_aug.csv
    │   ├── daily_aug.csv
    │   ├── agg.csv
    │   ├── addresses.csv
    │   ├── admins.csv
    │   ├── hourly_aug.csv
    │   ├── years_pivoted.csv
    │   ├── dates_pivoted.csv
    │   ├── lat_longs.csv
    │   ├── lazo_aug.csv
    │   ├── geo_aug.csv
    │   ├── daily_aug_hours.csv
    │   ├── spatiotemporal_aug.csv
    │   ├── years_pivoted.converted.csv
    │   ├── basic.csv
    │   ├── daily.csv
    │   ├── basic.d3m.csv
    │   ├── dates_pivoted.converted.csv
    │   ├── lazo.csv
    │   ├── annotated.csv
    │   ├── temporal.py
    │   ├── hourly.csv
    │   ├── spatiotemporal.py
    │   └── geo.py
    ├── ci.env
    ├── __main__.py
    └── test_common.py
├── apiserver
    ├── apiserver
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── search
    │   │   └── base.py
    │   ├── enhance_metadata.py
    │   └── graceful_shutdown.py
    └── setup.py
├── coordinator
    ├── coordinator
    │   ├── __init__.py
    │   ├── __main__.py
    │   └── templates
    │   │   ├── errors.html
    │   │   ├── login.html
    │   │   └── base.html
    └── setup.py
├── snapshotter
    ├── snapshotter
    │   ├── __init__.py
    │   └── __main__.py
    └── setup.py
├── cache_cleaner
    ├── cache_cleaner
    │   ├── __init__.py
    │   └── __main__.py
    └── setup.py
├── discovery
    ├── noaa
    │   ├── noaa_discovery
    │   │   └── __init__.py
    │   └── setup.py
    ├── ckan
    │   └── setup.py
    ├── zenodo
    │   └── setup.py
    ├── isi
    │   └── setup.py
    ├── socrata
    │   └── setup.py
    ├── worldbank
    │   └── setup.py
    └── uaz_indicators
    │   └── setup.py
├── frontend
    ├── .eslintignore
    ├── src
    │   ├── react-app-env.d.ts
    │   ├── components
    │   │   ├── SearchResults
    │   │   │   ├── SearchState.ts
    │   │   │   ├── SearchResults.css
    │   │   │   ├── SimpleBar.tsx
    │   │   │   └── DatasetSample.css
    │   │   ├── RelatedFileFilter
    │   │   │   └── RelatedFileColumnsSelector.css
    │   │   ├── ui
    │   │   │   ├── Button
    │   │   │   │   ├── Button.css
    │   │   │   │   └── Button.tsx
    │   │   │   ├── Tabs
    │   │   │   │   ├── Tabs.css
    │   │   │   │   └── Tabs.tsx
    │   │   │   └── DropdownMenu
    │   │   │   │   └── DropdownMenu.tsx
    │   │   ├── visus
    │   │   │   ├── Loading
    │   │   │   │   ├── Loading.tsx
    │   │   │   │   └── Spinner.tsx
    │   │   │   ├── Card
    │   │   │   │   ├── card.css
    │   │   │   │   └── Card.tsx
    │   │   │   └── PersistentComponent
    │   │   │   │   └── PersistentComponent.tsx
    │   │   ├── AdvancedSearchBar
    │   │   │   └── AdvancedSearchBar.css
    │   │   ├── MainMenu
    │   │   │   ├── MainMenu.css
    │   │   │   └── MainMenu.tsx
    │   │   ├── SearchBar
    │   │   │   ├── SearchBar.css
    │   │   │   └── SearchBar.tsx
    │   │   ├── Logo
    │   │   │   ├── Logo.css
    │   │   │   ├── Logo.tsx
    │   │   │   └── auctus-logo.min.svg
    │   │   ├── Badges
    │   │   │   ├── Badges.css
    │   │   │   └── IconAbc.tsx
    │   │   ├── Upload
    │   │   │   └── Upload.css
    │   │   ├── FilterContainer
    │   │   │   └── FilterContainer.tsx
    │   │   ├── GeoSpatialCoverageMap
    │   │   │   └── GeoSpatialCoverageMap.css
    │   │   ├── Chip
    │   │   │   ├── Chip.tsx
    │   │   │   └── Chip.css
    │   │   ├── DateFilter
    │   │   │   └── DateFilter.css
    │   │   └── JoinColumnsSelector
    │   │   │   └── FunctionBin.tsx
    │   ├── setupTests.ts
    │   ├── index.tsx
    │   ├── App.test.tsx
    │   ├── index.css
    │   ├── spatial-utils.ts
    │   ├── App.tsx
    │   └── config.ts
    ├── .prettierrc.js
    ├── public
    │   ├── favicon.ico
    │   ├── robots.txt
    │   ├── manifest.json
    │   └── index.html
    ├── .gitignore
    ├── nginx.conf
    ├── tsconfig.json
    ├── Dockerfile
    ├── .eslintrc.json
    ├── README.md
    └── package.json
├── lib_fslock
    ├── MANIFEST.in
    ├── datamart_fslock
    │   └── __init__.py
    ├── LICENSE.txt
    └── setup.py
├── docker
    ├── ckan.json
    ├── zenodo.json
    ├── coveragerc
    ├── redis.conf
    ├── socrata.json
    ├── etc_rabbitmq
    │   ├── enabled_plugins
    │   └── rabbitmq.conf
    ├── haproxy.dockerfile
    ├── rabbitmq.dockerfile
    ├── nominatim.dockerfile
    ├── haproxy.conf
    ├── prometheus.yml
    ├── grafana.dockerfile
    └── install_deps.py
├── lib_core
    ├── MANIFEST.in
    ├── NOTICE.txt
    ├── datamart_core
    │   ├── __init__.py
    │   ├── prom.py
    │   ├── types.py
    │   └── augment.py
    └── setup.py
├── docs
    ├── amqp.png
    ├── architecture.png
    ├── screenshots
    │   ├── join.png
    │   ├── menu.png
    │   ├── union.png
    │   ├── results.png
    │   ├── search.png
    │   ├── upload.png
    │   ├── statistics.png
    │   ├── column-view.png
    │   ├── filter-source.png
    │   ├── filter-spatial.png
    │   ├── filter-temporal.png
    │   └── filter-related-file.png
    ├── python
    │   ├── index.rst
    │   ├── datamart-augmentation.rst
    │   ├── datamart-rest.rst
    │   └── datamart-profiler.rst
    ├── redoc
    │   └── index.html
    ├── schemas.rst
    ├── Makefile
    ├── index.rst
    ├── make.bat
    └── conf.py
├── lib_augmentation
    ├── MANIFEST.in
    ├── NOTICE.txt
    ├── datamart_augmentation
    │   └── __init__.py
    ├── README.rst
    └── setup.py
├── lib_materialize
    ├── MANIFEST.in
    ├── NOTICE.txt
    ├── datamart_materialize
    │   ├── spss.py
    │   ├── stata.py
    │   ├── tsv.py
    │   ├── parquet.py
    │   ├── common.py
    │   ├── excel.py
    │   ├── excel97.py
    │   ├── types.py
    │   ├── pivot.py
    │   └── utils.py
    └── README.rst
├── lib_profiler
    ├── MANIFEST.in
    ├── NOTICE.txt
    ├── datamart_profiler
    │   ├── __init__.py
    │   ├── warning_tools.py
    │   └── types.py
    ├── README.rst
    └── setup.py
├── .gitmodules
├── NOTICE.txt
├── scripts
    ├── load_env.sh
    ├── docker_clear_caches.sh
    ├── docker_save_uploads.sh
    ├── run_frontend_tests.sh
    ├── docker_purge_source.sh
    ├── upload_dataset.sh
    ├── docker_export_all.sh
    ├── setup.sh
    ├── docker_import_all.sh
    ├── docker_build_push.sh
    ├── docker_import_snapshot.sh
    ├── delete_dataset.py
    ├── report-uploads.sh
    ├── list_big_datasets.py
    ├── list_sources.py
    ├── clear_caches.py
    ├── canonicalize_yaml.py
    ├── purge_source.py
    ├── migrate-source-url.py
    ├── migrate-point-format.py
    ├── README.md
    ├── reprocess_all.py
    ├── export_all.py
    ├── migrate-types-and-attributes.py
    └── migrate-temporal-coverage.py
├── contrib
    └── k8s
    │   ├── Makefile
    │   ├── README.md
    │   ├── secrets.jsonnet
    │   ├── snapshotter.libsonnet
    │   ├── auctus.libsonnet
    │   └── discovery
    │       ├── worldbank.libsonnet
    │       └── uaz-indicators.libsonnet
├── .gitignore
├── .dockerignore
├── env.default
├── profiler
    └── setup.py
└── pyproject.toml


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/data/invalid.bin:
--------------------------------------------------------------------------------
1 |     


--------------------------------------------------------------------------------
/apiserver/apiserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/coordinator/coordinator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/snapshotter/snapshotter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cache_cleaner/cache_cleaner/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/discovery/noaa/noaa_discovery/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/frontend/.eslintignore:
--------------------------------------------------------------------------------
1 | **/node_modules
2 | build/


--------------------------------------------------------------------------------
/lib_fslock/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | 


--------------------------------------------------------------------------------
/tests/data/empty.csv:
--------------------------------------------------------------------------------
1 | important features,not here
2 | 


--------------------------------------------------------------------------------
/docker/ckan.json:
--------------------------------------------------------------------------------
1 | [
2 |   {"url": "data.humdata.org"}
3 | ]
4 | 


--------------------------------------------------------------------------------
/docker/zenodo.json:
--------------------------------------------------------------------------------
1 | {
2 |   "keyword_query": "covid"
3 | }
4 | 


--------------------------------------------------------------------------------
/lib_core/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include NOTICE.txt
3 | 


--------------------------------------------------------------------------------
/docker/coveragerc:
--------------------------------------------------------------------------------
1 | [paths]
2 | source =
3 |     .
4 |     /usr/src/app
5 | 


--------------------------------------------------------------------------------
/docker/redis.conf:
--------------------------------------------------------------------------------
1 | maxmemory 500mb
2 | maxmemory-policy allkeys-lru
3 | 


--------------------------------------------------------------------------------
/docs/amqp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/amqp.png


--------------------------------------------------------------------------------
/frontend/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="react-scripts" />
2 | 


--------------------------------------------------------------------------------
/frontend/.prettierrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   ...require('gts/.prettierrc.json')
3 | }


--------------------------------------------------------------------------------
/docs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/architecture.png


--------------------------------------------------------------------------------
/tests/data/excel.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/excel.xlsx


--------------------------------------------------------------------------------
/tests/data/hourly_aug_days.csv:
--------------------------------------------------------------------------------
1 | orig_date,color
2 | 2019-06-12,pink
3 | 2019-06-13,grey
4 | 


--------------------------------------------------------------------------------
/tests/data/spss.sav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/spss.sav


--------------------------------------------------------------------------------
/lib_augmentation/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include NOTICE.txt
3 | include README.rst
4 | 


--------------------------------------------------------------------------------
/lib_materialize/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include NOTICE.txt
3 | include README.rst
4 | 


--------------------------------------------------------------------------------
/lib_profiler/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include NOTICE.txt
3 | include README.rst
4 | 


--------------------------------------------------------------------------------
/tests/data/excel97.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/excel97.xls


--------------------------------------------------------------------------------
/tests/data/stata114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/stata114.dta


--------------------------------------------------------------------------------
/tests/data/stata118.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/stata118.dta


--------------------------------------------------------------------------------
/docs/screenshots/join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/join.png


--------------------------------------------------------------------------------
/docs/screenshots/menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/menu.png


--------------------------------------------------------------------------------
/docs/screenshots/union.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/union.png


--------------------------------------------------------------------------------
/tests/data/basic_aug.csv:
--------------------------------------------------------------------------------
1 | number,desk_faces
2 | 5,west
3 | 4,south
4 | 7,west
5 | 6,east
6 | 11,
7 | 


--------------------------------------------------------------------------------
/tests/data/parquet.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/tests/data/parquet.parquet


--------------------------------------------------------------------------------
/docs/screenshots/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/results.png


--------------------------------------------------------------------------------
/docs/screenshots/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/search.png


--------------------------------------------------------------------------------
/docs/screenshots/upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/upload.png


--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/frontend/public/favicon.ico


--------------------------------------------------------------------------------
/frontend/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow: /
4 | 


--------------------------------------------------------------------------------
/docker/socrata.json:
--------------------------------------------------------------------------------
1 | [
2 |   {"url": "data.cityofnewyork.us"},
3 |   {"url": "finances.worldbank.org"}
4 | ]
5 | 


--------------------------------------------------------------------------------
/docs/screenshots/statistics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/statistics.png


--------------------------------------------------------------------------------
/tests/data/agg_aug.csv:
--------------------------------------------------------------------------------
1 | id,location
2 | 40,brazil
3 | 30,south korea
4 | 70,usa
5 | 80,canada
6 | 100,france
7 | 


--------------------------------------------------------------------------------
/docs/screenshots/column-view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/column-view.png


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib_geo"]
2 | 	path = lib_geo
3 | 	url = https://gitlab.com/ViDA-NYU/auctus/datamart-geo.git
4 | 


--------------------------------------------------------------------------------
/apiserver/apiserver/__main__.py:
--------------------------------------------------------------------------------
1 | from apiserver.main import main
2 | 
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/docs/screenshots/filter-source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/filter-source.png


--------------------------------------------------------------------------------
/docs/screenshots/filter-spatial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/filter-spatial.png


--------------------------------------------------------------------------------
/docs/screenshots/filter-temporal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/filter-temporal.png


--------------------------------------------------------------------------------
/coordinator/coordinator/__main__.py:
--------------------------------------------------------------------------------
1 | from coordinator.web import main
2 | 
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/docker/etc_rabbitmq/enabled_plugins:
--------------------------------------------------------------------------------
1 | [rabbitmq_management,rabbitmq_prometheus,rabbitmq_shovel,rabbitmq_shovel_management].
2 | 


--------------------------------------------------------------------------------
/docs/screenshots/filter-related-file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/auctus/HEAD/docs/screenshots/filter-related-file.png


--------------------------------------------------------------------------------
/snapshotter/snapshotter/__main__.py:
--------------------------------------------------------------------------------
1 | from snapshotter.snapshot import main
2 | 
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/cache_cleaner/cache_cleaner/__main__.py:
--------------------------------------------------------------------------------
1 | from cache_cleaner.cache import main
2 | 
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/tests/data/daily_aug.csv:
--------------------------------------------------------------------------------
1 | orig_date,n_people
2 | 2019-04-28,3
3 | 2019-04-29,5
4 | 2019-04-30,0
5 | 2019-05-01,1
6 | 2019-05-02,3
7 | 2019-05-03,2
8 | 


--------------------------------------------------------------------------------
/tests/data/agg.csv:
--------------------------------------------------------------------------------
1 | id,work,salary
2 | 40,false,
3 | 30,true,200
4 | 70,true,
5 | 80,true,200
6 | 100,false,300
7 | 100,true,200
8 | 30,false,100
9 | 70,false,600


--------------------------------------------------------------------------------
/docker/haproxy.dockerfile:
--------------------------------------------------------------------------------
1 | FROM haproxy:2.4
2 | 
3 | USER root
4 | RUN apt-get update && apt-get install -y curl && \
5 |     rm -rf /var/lib/apt/lists/*
6 | USER haproxy
7 | 


--------------------------------------------------------------------------------
/lib_fslock/datamart_fslock/__init__.py:
--------------------------------------------------------------------------------
1 | from .unix import FSLockExclusive, FSLockShared
2 | 
3 | 
4 | __all__ = ['FSLockExclusive', 'FSLockShared']
5 | __version__ = '2.1'
6 | 


--------------------------------------------------------------------------------
/tests/data/addresses.csv:
--------------------------------------------------------------------------------
1 | place,loc
2 | Bobst,"70 Washington Square S, New York, NY 10012"
3 | Tandon,"6 MetroTech, Brooklyn, NY 11201"
4 | WWH,"251 Mercer St, New York, NY 10012"
5 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Datamart (codename Auctus)
2 | 
3 | Copyright 2018 New York University
4 | 
5 | This product includes software developed at
6 | New York University
7 | https://www.nyu.edu/
8 | 


--------------------------------------------------------------------------------
/tests/data/admins.csv:
--------------------------------------------------------------------------------
1 | zero,one,mixed
2 | italy,Brittany,france
3 | italy,Normandie,Normandie
4 | spain,region occitanie,germany
5 | germany,Bavaria,Bavaria
6 | germany,Hamburg,Brittany
7 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchResults/SearchState.ts:
--------------------------------------------------------------------------------
1 | enum SearchState {
2 |   CLEAN,
3 |   SEARCH_REQUESTING,
4 |   SEARCH_SUCCESS,
5 |   SEARCH_FAILED,
6 | }
7 | 
8 | export {SearchState};
9 | 


--------------------------------------------------------------------------------
/lib_core/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Datamart core library
2 | 
3 | Copyright 2018 New York University
4 | 
5 | This product includes software developed at
6 | New York University
7 | https://www.nyu.edu/
8 | 


--------------------------------------------------------------------------------
/tests/data/hourly_aug.csv:
--------------------------------------------------------------------------------
1 | orig_date,color
2 | 2019-06-13T01:00:00,yellow
3 | 2019-06-13T02:00:00,yellow
4 | 2019-06-13T03:00:00,brown
5 | 2019-06-13T04:00:00,brown
6 | 2019-06-13T05:00:00,yellow
7 | 


--------------------------------------------------------------------------------
/lib_profiler/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Datamart profiler library
2 | 
3 | Copyright 2018 New York University
4 | 
5 | This product includes software developed at
6 | New York University
7 | https://www.nyu.edu/
8 | 


--------------------------------------------------------------------------------
/tests/data/years_pivoted.csv:
--------------------------------------------------------------------------------
1 | color,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
2 | green,yes,no,no,yes,no,yes,yes,yes,yes,no,no,yes
3 | red,no,yes,yes,yes,no,no,no,yes,no,yes,yes,no
4 | 


--------------------------------------------------------------------------------
/lib_augmentation/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Datamart augmentation library
2 | 
3 | Copyright 2018 New York University
4 | 
5 | This product includes software developed at
6 | New York University
7 | https://www.nyu.edu/
8 | 


--------------------------------------------------------------------------------
/lib_augmentation/datamart_augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .augmentation import AugmentationError, join, union
2 | 
3 | 
4 | __version__ = '0.10'
5 | 
6 | 
7 | __all__ = ['AugmentationError', 'join', 'union']
8 | 


--------------------------------------------------------------------------------
/lib_materialize/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Datamart materialization library
2 | 
3 | Copyright 2018 New York University
4 | 
5 | This product includes software developed at
6 | New York University
7 | https://www.nyu.edu/
8 | 


--------------------------------------------------------------------------------
/docker/rabbitmq.dockerfile:
--------------------------------------------------------------------------------
1 | FROM rabbitmq:3.8.11-management
2 | 
3 | COPY --chown=999:999 etc_rabbitmq/rabbitmq.conf /etc/rabbitmq/rabbitmq.conf
4 | COPY etc_rabbitmq/enabled_plugins /etc/rabbitmq/enabled_plugins
5 | 


--------------------------------------------------------------------------------
/lib_profiler/datamart_profiler/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import count_rows_to_skip, process_dataset
2 | from .temporal import parse_date
3 | 
4 | 
5 | __version__ = '0.11'
6 | 
7 | 
8 | __all__ = ['count_rows_to_skip', 'process_dataset', 'parse_date']
9 | 


--------------------------------------------------------------------------------
/docs/python/index.rst:
--------------------------------------------------------------------------------
 1 | Python libraries
 2 | ================
 3 | 
 4 | Some components
 5 | 
 6 | ..  toctree::
 7 |     :maxdepth: 2
 8 | 
 9 |     datamart-rest
10 |     datamart-profiler
11 |     datamart-materialize
12 |     datamart-augmentation
13 | 


--------------------------------------------------------------------------------
/frontend/src/components/RelatedFileFilter/RelatedFileColumnsSelector.css:
--------------------------------------------------------------------------------
 1 | .label-button {
 2 |     cursor: pointer;
 3 |     text-decoration: underline;
 4 |     background: transparent;
 5 |     border: 0;
 6 | }
 7 | .danger {
 8 |     color: #f44336;
 9 | }
10 | 


--------------------------------------------------------------------------------
/scripts/load_env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | while read line; do
 4 |   if [ "$line" != "" -a "${line:0:1}" != "#" ]; then
 5 |     export "$line"
 6 |   fi
 7 | done <.env
 8 | export DATAMART_VERSION=$(git describe)
 9 | export DATAMART_GEO_DATA="$(pwd)/lib_geo/data"
10 | 


--------------------------------------------------------------------------------
/tests/data/dates_pivoted.csv:
--------------------------------------------------------------------------------
1 | color,2012-01-01,2012-02-01,2012-03-01,2012-04-01,2012-05-01,2012-06-01,2012-07-01,2012-08-01,2012-09-01,2012-10-01,2012-11-01,2012-12-01
2 | green,yes,no,no,yes,no,yes,yes,yes,yes,no,no,yes
3 | red,no,yes,yes,yes,no,no,no,yes,no,yes,yes,no
4 | 


--------------------------------------------------------------------------------
/scripts/docker_clear_caches.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eu
3 | cd "$(dirname "$(dirname "$0")")"
4 | PROJ="$(basename "$(pwd)")"
5 | docker run -ti --rm --network ${PROJ}_default -v $PWD/scripts:/scripts -v $PWD/volumes/cache:/cache auctus python /scripts/clear_caches.py "$@"
6 | 


--------------------------------------------------------------------------------
/scripts/docker_save_uploads.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eu
3 | cd "$(dirname "$(dirname "$0")")"
4 | PROJ="$(basename "$(pwd)")"
5 | docker run -ti --rm --network ${PROJ}_default -v $PWD/volumes/datasets:/datasets auctus sh -c 'tar zc /datasets/datamart.upload.*' >uploads.tar.gz
6 | 


--------------------------------------------------------------------------------
/contrib/k8s/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all clean generate
 2 | 
 3 | all: clean generate
 4 | 
 5 | clean:
 6 | 	rm -rf yaml
 7 | 
 8 | generate: *.jsonnet *.libsonnet discovery/*.libsonnet
 9 | 	mkdir -p yaml
10 | 	jsonnet -S -m yaml deployment.jsonnet
11 | 	jsonnet -S -m yaml secrets.jsonnet
12 | 


--------------------------------------------------------------------------------
/scripts/run_frontend_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd "$(dirname "$(dirname "$0")")"
 4 | 
 5 | set -eu
 6 | 
 7 | # Run frontend tests
 8 | docker build -t auctus_frontend_npm -f frontend/Dockerfile --target build .
 9 | docker run -ti --name auctus_npm_test --rm auctus_frontend_npm sh -c "CI=true npm run test"
10 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/Button/Button.css:
--------------------------------------------------------------------------------
 1 | .button-group > * {
 2 |   display: inline-flex;
 3 |   margin-right: 0.25rem;
 4 |   margin-bottom: 0.075rem;
 5 |   margin-top: 0.075rem;
 6 | }
 7 | 
 8 | .button-group > *:last-child {
 9 |   margin-right: 0px;
10 | }
11 | 
12 | .btn > .feather:first-child {
13 |   margin-right: 0.2rem;
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/data/lat_longs.csv:
--------------------------------------------------------------------------------
1 | from latitude,to long,to lat,from longitude,unpaired lat
2 | 40.734746,-74.000077,40.728026,-73.998869,40.728278
3 | 40.726640,-73.993186,40.732466,-74.004689,40.722948
4 | 40.735108,-73.996996,40.727577,-74.002853,40.730824
5 | 40.729115,-74.001726,40.734259,-73.996833,40.723674
6 | 40.728896,-73.998542,40.728711,-74.002426,40.733272
7 | 


--------------------------------------------------------------------------------
/tests/data/lazo_aug.csv:
--------------------------------------------------------------------------------
 1 | favorite
 2 | Peanut Butter
 3 | Ice cream
 4 | flan
 5 | orange
 6 | kiwi
 7 | coconut
 8 | liquorICE
 9 | MACaron
10 | pear
11 | CANDY
12 | pudding
13 | doughnut
14 | marzipan
15 | tart
16 | pecan pie
17 | souffle
18 | Pastry
19 | banana
20 | caramel
21 | milkshake
22 | Chocolate
23 | tiramisu
24 | tres leches
25 | calisson
26 | taffy
27 | lemon
28 | 


--------------------------------------------------------------------------------
/contrib/k8s/README.md:
--------------------------------------------------------------------------------
 1 | # How to configure
 2 | 
 3 | We use [Jsonnet](https://jsonnet.org/) to automate the generation of the YAML config files for Kubernetes.
 4 | 
 5 | You should only need to update `deployment.jsonnet` and `secrets.jsonnet`, then you can generate the YAML files using:
 6 | ```
 7 | mkdir yaml
 8 | jsonnet -S -m yaml/ deployment.jsonnet
 9 | jsonnet -S -m yaml/ secrets.jsonnet
10 | ```
11 | 


--------------------------------------------------------------------------------
/lib_core/datamart_core/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import prometheus_client
 3 | 
 4 | from .discovery import Discoverer, AsyncDiscoverer
 5 | 
 6 | 
 7 | __all__ = ['Discoverer', 'AsyncDiscoverer']
 8 | 
 9 | 
10 | PROM_VERSION = prometheus_client.Gauge('version', "Datamart version",
11 |                                        ['version'])
12 | PROM_VERSION.labels(os.environ['DATAMART_VERSION']).set(1)
13 | 


--------------------------------------------------------------------------------
/scripts/docker_purge_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eu
3 | cd "$(dirname "$(dirname "$0")")"
4 | PROJ="$(basename "$(pwd)")"
5 | docker run -ti --rm --network ${PROJ}_default -v $PWD/scripts:/scripts -v $PWD/volumes/cache:/cache -e ELASTICSEARCH_HOSTS=elasticsearch:9200 -e ELASTICSEARCH_PREFIX=${ELASTICSEARCH_PREFIX} -e LAZO_SERVER_HOST=lazo -e LAZO_SERVER_PORT=50051 auctus python /scripts/purge_source.py "$1"
6 | 


--------------------------------------------------------------------------------
/tests/data/geo_aug.csv:
--------------------------------------------------------------------------------
 1 | lat,long,id,letter
 2 | 40.731191,-74.002677,place100,a
 3 | 40.728870,-73.999367,place101,b
 4 | 40.737170,-73.999883,place102,c
 5 | 40.729107,-73.996659,place103,d
 6 | 40.730194,-74.004258,place104,e
 7 | 40.734620,-74.001027,place105,f
 8 | 40.727132,-73.994823,place106,g
 9 | 40.728087,-73.992458,place107,h
10 | 40.730429,-74.003744,place108,i
11 | 40.728873,-73.996520,place109,j
12 | 


--------------------------------------------------------------------------------
/tests/data/daily_aug_hours.csv:
--------------------------------------------------------------------------------
 1 | orig_date,n_people
 2 | 2019-04-25T21:00:00Z,3
 3 | 2019-04-26T01:00:00Z,5
 4 | 2019-04-26T05:00:00Z,6
 5 | 2019-04-26T09:00:00Z,7
 6 | 2019-04-26T13:00:00Z,6
 7 | 2019-04-26T17:00:00Z,8
 8 | 2019-04-26T21:00:00Z,7
 9 | 2019-04-27T01:00:00Z,0
10 | 2019-04-27T05:00:00Z,1
11 | 2019-04-27T09:00:00Z,0
12 | 2019-04-27T13:00:00Z,3
13 | 2019-04-27T17:00:00Z,0
14 | 2019-04-27T13:00:00Z,0
15 | 


--------------------------------------------------------------------------------
/scripts/upload_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # This script profiles a dataset and adds it to the index
 4 | 
 5 | set -eu
 6 | 
 7 | if [ "$#" != 3 ]; then
 8 |     echo "Usage: upload_dataset.sh <filename.csv> \"<name>\" \"<description>\"" >&2
 9 |     exit 2
10 | fi
11 | exec curl -X POST \
12 |     -F "file=@$1;filename=$(basename "$1")" -F "name=$2" -F "description=$3" \
13 |     http://localhost:8002/api/v1/upload
14 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # production
12 | /build
13 | 
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 | 
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/Tabs/Tabs.css:
--------------------------------------------------------------------------------
 1 | .nav-tabs .nav-item button {
 2 |   cursor: pointer;
 3 |   background-color: transparent;
 4 |   color: #63508b;
 5 | }
 6 | 
 7 | .nav-tabs .nav-item button {
 8 |   cursor: pointer;
 9 |   background-color: transparent;
10 |   color: #63508b;
11 | }
12 | 
13 | .nav-tabs .nav-item button:focus {
14 |   outline: none;
15 | }
16 | 
17 | .nav-tabs .nav-item .nav-link.active {
18 |   color: #212529;
19 | }


--------------------------------------------------------------------------------
/frontend/src/components/visus/Loading/Loading.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {Spinner} from './Spinner';
 3 | 
 4 | interface LoadingProps {
 5 |   message?: string;
 6 | }
 7 | 
 8 | class Loading extends React.PureComponent<LoadingProps> {
 9 |   render() {
10 |     const msg = this.props.message || 'Loading...';
11 |     return (
12 |       <span>
13 |         <Spinner /> {msg}
14 |       </span>
15 |     );
16 |   }
17 | }
18 | 
19 | export {Loading};
20 | 


--------------------------------------------------------------------------------
/scripts/docker_export_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -eu
 3 | cd "$(dirname "$(dirname "$0")")"
 4 | PROJ="$(basename "$(pwd)")"
 5 | if [ -z "$1" ]; then
 6 |     echo "Missing argument" >&2
 7 |     exit 1
 8 | fi
 9 | mkdir "$1"
10 | chown 998 "$1"
11 | docker run --rm --network ${PROJ}_default -v $PWD/scripts:/scripts -v "$1:/index" -w /index -e ELASTICSEARCH_HOSTS=elasticsearch:9200 -e ELASTICSEARCH_PREFIX=${ELASTICSEARCH_PREFIX} auctus python /scripts/export_all.py
12 | 


--------------------------------------------------------------------------------
/frontend/src/setupTests.ts:
--------------------------------------------------------------------------------
 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
 2 | // allows you to do things like:
 3 | // expect(element).toHaveTextContent(/react/i)
 4 | // learn more: https://github.com/testing-library/jest-dom
 5 | import '@testing-library/jest-dom/extend-expect';
 6 | 
 7 | // Setup Jest canvas mock. This is required to test components that use canvas
 8 | // (e.g., Open Layers library requires this)
 9 | import 'jest-canvas-mock';
10 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchResults/SearchResults.css:
--------------------------------------------------------------------------------
 1 | .column-infobox {
 2 |   width: 60%;
 3 |   padding-right: .25rem;
 4 |   padding-left: .5rem;
 5 | }
 6 | 
 7 | .column-infobox > * {
 8 |   overflow: auto;
 9 | }
10 | 
11 | .column-search-hits {
12 |   width: 40%;
13 |   min-width: 200px;
14 |   padding-left: .5rem;
15 |   padding-right: .4rem;
16 | }
17 | 
18 | .search-hits-group > * {
19 |   margin-bottom: .5rem;
20 | }
21 | 
22 | .search-hits-group > :last-child {
23 |   margin-bottom: 0;
24 | }


--------------------------------------------------------------------------------
/tests/data/spatiotemporal_aug.csv:
--------------------------------------------------------------------------------
 1 | date,latitude,longitude
 2 | 2006-06-20T06:00:00,43.237,6.072
 3 | 2006-06-20T06:00:00,43.238,6.072
 4 | 2006-06-20T06:00:00,43.237,6.073
 5 | 2006-06-20T06:00:00,43.238,6.073
 6 | 2006-06-20T07:00:00,43.237,6.072
 7 | 2006-06-20T07:00:00,43.238,6.072
 8 | 2006-06-20T07:00:00,43.237,6.073
 9 | 2006-06-20T07:00:00,43.238,6.073
10 | 2006-06-20T08:00:00,43.237,6.072
11 | 2006-06-20T08:00:00,43.238,6.072
12 | 2006-06-20T08:00:00,43.237,6.073
13 | 2006-06-20T08:00:00,43.238,6.073
14 | 


--------------------------------------------------------------------------------
/tests/data/years_pivoted.converted.csv:
--------------------------------------------------------------------------------
 1 | color,year,value
 2 | green,2006,yes
 3 | green,2007,no
 4 | green,2008,no
 5 | green,2009,yes
 6 | green,2010,no
 7 | green,2011,yes
 8 | green,2012,yes
 9 | green,2013,yes
10 | green,2014,yes
11 | green,2015,no
12 | green,2016,no
13 | green,2017,yes
14 | red,2006,no
15 | red,2007,yes
16 | red,2008,yes
17 | red,2009,yes
18 | red,2010,no
19 | red,2011,no
20 | red,2012,no
21 | red,2013,yes
22 | red,2014,no
23 | red,2015,yes
24 | red,2016,yes
25 | red,2017,no
26 | 


--------------------------------------------------------------------------------
/tests/data/basic.csv:
--------------------------------------------------------------------------------
 1 | name,color,number,what
 2 | james,green,5,false
 3 | john,blue,4,false
 4 | robert,blue,6,false
 5 | michael,blue,7,true
 6 | william,blue,7,true
 7 | david,green,5,false
 8 | richard,green,7,true
 9 | joseph,blue,6,true
10 | thomas,blue,6,false
11 | charles,blue,7,false
12 | christopher,green,11,true
13 | daniel,blue,5,false
14 | matthew,green,7,true
15 | anthony,green,7,true
16 | donald,blue,6,true
17 | mark,blue,4,false
18 | paul,blue,4,false
19 | steven,blue,6,false
20 | andrew,green,6,false
21 | kenneth,green,7,true
22 | 


--------------------------------------------------------------------------------
/frontend/nginx.conf:
--------------------------------------------------------------------------------
 1 | server {
 2 |     # HTTP redirects to HTTPS
 3 |     listen 80;
 4 |     listen [::]:80;
 5 | 
 6 |     server_name _;
 7 | 
 8 |     root /var/www/html;
 9 | 
10 |     charset utf-8;
11 | 
12 |     location /static/ {
13 |         root /var/www/html;
14 |         autoindex off;
15 |         if ($query_string) {
16 |             expires max;
17 |         }
18 |     }
19 | 
20 |     location /.well-known/ {
21 |         try_files $uri =404;
22 |     }
23 | 
24 |     location / {
25 |         try_files $uri /index.html;
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/data/daily.csv:
--------------------------------------------------------------------------------
 1 | aug_date,rain
 2 | 20190423,no
 3 | 20190424,no
 4 | 20190425,yes
 5 | 20190426,no
 6 | 20190427,yes
 7 | 20190428,yes
 8 | 20190429,yes
 9 | 20190430,yes
10 | 20190501,no
11 | 20190502,no
12 | 20190503,yes
13 | 20190504,no
14 | 20190505,yes
15 | 20190506,yes
16 | 20190507,no
17 | 20190508,yes
18 | 20190509,yes
19 | 20190510,no
20 | 20190511,no
21 | 20190512,yes
22 | 20190513,no
23 | 20190514,no
24 | 20190515,no
25 | 20190516,no
26 | 20190517,yes
27 | 20190518,no
28 | 20190519,yes
29 | 20190520,no
30 | 20190521,no
31 | 20190522,yes
32 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/spss.py:
--------------------------------------------------------------------------------
 1 | import pyreadstat
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | def spss_to_csv(source_filename, dest_fileobj):
 7 |     df, meta = pyreadstat.read_sav(source_filename)
 8 |     df.to_csv(
 9 |         dest_fileobj,
10 |         float_format='%g',
11 |         index=False,
12 |         line_terminator='\r\n',
13 |     )
14 | 
15 | 
16 | class SpssConverter(SimpleConverter):
17 |     """Adapter converting an SPSS file to CSV.
18 |     """
19 |     transform = staticmethod(spss_to_csv)
20 | 


--------------------------------------------------------------------------------
/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eux
 4 | 
 5 | sudo(){
 6 |   if [ $(id -u) = 0 ]; then
 7 |     "$@"
 8 |   else
 9 |     command sudo "$@"
10 |   fi
11 | }
12 | 
13 | # Set up volume permissions
14 | mkdir -p volumes/datasets && sudo chown -R 998 volumes/datasets
15 | mkdir -p volumes/cache && sudo chown -R 998 volumes/cache
16 | mkdir -p volumes/prometheus && sudo chown -R 65534:65534 volumes/prometheus
17 | mkdir -p volumes/elasticsearch && sudo chown -R 1000:0 volumes/elasticsearch
18 | mkdir -p volumes/grafana && sudo chown -R 472:472 volumes/grafana
19 | 


--------------------------------------------------------------------------------
/frontend/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom';
 3 | import './index.css';
 4 | import './bootstrap4-nyu-d3m.min.css';
 5 | 
 6 | import {App} from './App';
 7 | import * as serviceWorker from './serviceWorker';
 8 | 
 9 | ReactDOM.render(<App />, document.getElementById('root'));
10 | 
11 | // If you want your app to work offline and load faster, you can change
12 | // unregister() to register() below. Note this comes with some pitfalls.
13 | // Learn more about service workers: https://bit.ly/CRA-PWA
14 | serviceWorker.unregister();
15 | 


--------------------------------------------------------------------------------
/scripts/docker_import_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -eu
 3 | cd "$(dirname "$(dirname "$0")")"
 4 | PROJ="$(basename "$(pwd)")"
 5 | if [ -z "$1" ]; then
 6 |     echo "Missing argument" >&2
 7 |     exit 1
 8 | fi
 9 | docker run -ti --rm --network ${PROJ}_default -v $PWD/scripts:/scripts -v "$1:/index" -e ELASTICSEARCH_HOSTS=elasticsearch:9200 -e ELASTICSEARCH_PREFIX=${ELASTICSEARCH_PREFIX} -e AMQP_HOST=rabbitmq -e AMQP_PORT=5672 -e AMQP_USER=$AMQP_USER -e AMQP_PASSWORD=$AMQP_PASSWORD -e LAZO_SERVER_HOST=lazo -e LAZO_SERVER_PORT=50051 auctus python /scripts/import_all.py /index
10 | 


--------------------------------------------------------------------------------
/frontend/src/components/AdvancedSearchBar/AdvancedSearchBar.css:
--------------------------------------------------------------------------------
 1 | .AdvancedSearchBar {
 2 |   max-width: 1000px;
 3 |   margin: 0.375rem auto 0 auto!important;
 4 | }
 5 | 
 6 | .AdvancedSearchBar-title {
 7 |   vertical-align: middle;
 8 |   padding: .375rem 0;
 9 |   font-size: 1rem;
10 |   line-height: 1.5;
11 | }
12 | 
13 | .AdvancedSearchBar-item {
14 |   margin-left: .25rem;
15 |   padding-right: 0;
16 |   color: #707070;
17 |   font-size: .9rem;
18 |   cursor: pointer;
19 | }
20 | 
21 | .AdvancedSearchBar-item span {
22 |   padding-left: .25em;
23 |   padding-right: .25em;
24 | }


--------------------------------------------------------------------------------
/scripts/docker_build_push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -eux
 3 | 
 4 | cd "$(dirname "$0")/.."
 5 | 
 6 | VERSION=$(git describe)
 7 | 
 8 | # Build
 9 | docker build -t auctus --build-arg version=$VERSION .
10 | docker build -t auctus_frontend -f frontend/Dockerfile .
11 | 
12 | # Push
13 | docker tag auctus registry.gitlab.com/vida-nyu/auctus/auctus:$VERSION
14 | docker push registry.gitlab.com/vida-nyu/auctus/auctus:$VERSION
15 | docker tag auctus_frontend registry.gitlab.com/vida-nyu/auctus/auctus/frontend:$VERSION
16 | docker push registry.gitlab.com/vida-nyu/auctus/auctus/frontend:$VERSION
17 | 


--------------------------------------------------------------------------------
/docker/etc_rabbitmq/rabbitmq.conf:
--------------------------------------------------------------------------------
 1 | loopback_users.guest = false
 2 | listeners.tcp.default = 5672
 3 | management.tcp.port = 15672
 4 | 
 5 | management.sample_retention_policies.global.minute = 5
 6 | management.sample_retention_policies.global.hour = 60
 7 | management.sample_retention_policies.global.day = 1200
 8 | 
 9 | management.sample_retention_policies.basic.minute = 5
10 | management.sample_retention_policies.basic.hour = 60
11 | management.sample_retention_policies.basic.day = 1200
12 | 
13 | management.sample_retention_policies.detailed.10 = 5
14 | 
15 | prometheus.return_per_object_metrics = true
16 | 


--------------------------------------------------------------------------------
/frontend/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "Auctus",
 3 |   "name": "Auctus",
 4 |   "icons": [
 5 |     {
 6 |       "src": "favicon.ico",
 7 |       "sizes": "64x64 32x32 24x24 16x16",
 8 |       "type": "image/x-icon"
 9 |     },
10 |     {
11 |       "src": "logo192.png",
12 |       "type": "image/png",
13 |       "sizes": "192x192"
14 |     },
15 |     {
16 |       "src": "logo512.png",
17 |       "type": "image/png",
18 |       "sizes": "512x512"
19 |     }
20 |   ],
21 |   "start_url": ".",
22 |   "display": "standalone",
23 |   "theme_color": "#000000",
24 |   "background_color": "#ffffff"
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/redoc/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>Auctus REST Documentation</title>
 5 |     <!-- needed for adaptive design -->
 6 |     <meta charset="utf-8"/>
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 |     <!--
10 |     ReDoc doesn't change outer page styles
11 |     -->
12 |     <style>
13 |       body {
14 |         margin: 0;
15 |         padding: 0;
16 |       }
17 |     </style>
18 |   </head>
19 |   <body>
20 |     <redoc spec-url="../schemas/restapi.yaml"></redoc>
21 |     <script src="redoc.js"> </script>
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/frontend/src/components/visus/Card/card.css:
--------------------------------------------------------------------------------
 1 | .card-title {
 2 |   margin-bottom: .75rem;
 3 |   margin-top: -34px;
 4 |   background: white;
 5 |   padding: 3px 10px 3px 10px;
 6 |   width: fit-content;
 7 |   font-size: 1rem;
 8 |   border-radius: 3px;
 9 |   margin-left: -10px;
10 | }
11 | 
12 | .card-attributes {
13 |   display: flex;
14 |   width: 100%;
15 | }
16 | 
17 | .card-attributes .card-attr-field {
18 |   font-weight: bold;
19 |   text-align: right;
20 |   max-width: 130px;
21 | }
22 | 
23 | .card-hover {
24 |   background-color: #FFFFFF;
25 | }
26 | 
27 | .card-hover:hover {
28 |   background-color: #f8f8f8;
29 | }


--------------------------------------------------------------------------------
/tests/data/basic.d3m.csv:
--------------------------------------------------------------------------------
 1 | d3mIndex,name,color,number,what
 2 | 0,james,green,5,false
 3 | 1,john,blue,4,false
 4 | 2,robert,blue,6,false
 5 | 3,michael,blue,7,true
 6 | 4,william,blue,7,true
 7 | 5,david,green,5,false
 8 | 6,richard,green,7,true
 9 | 7,joseph,blue,6,true
10 | 8,thomas,blue,6,false
11 | 9,charles,blue,7,false
12 | 10,christopher,green,11,true
13 | 11,daniel,blue,5,false
14 | 12,matthew,green,7,true
15 | 13,anthony,green,7,true
16 | 14,donald,blue,6,true
17 | 15,mark,blue,4,false
18 | 16,paul,blue,4,false
19 | 17,steven,blue,6,false
20 | 18,andrew,green,6,false
21 | 19,kenneth,green,7,true
22 | 


--------------------------------------------------------------------------------
/scripts/docker_import_snapshot.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -eu
3 | cd "$(dirname "$(dirname "$0")")"
4 | PROJ="$(basename "$(pwd)")"
5 | docker run -ti --rm --network ${PROJ}_default -v $PWD/scripts:/scripts -e ELASTICSEARCH_HOSTS=elasticsearch:9200  -e ELASTICSEARCH_PREFIX=${ELASTICSEARCH_PREFIX} -e AMQP_HOST=rabbitmq -e AMQP_PORT=5672 -e AMQP_USER=$AMQP_USER -e AMQP_PASSWORD=$AMQP_PASSWORD -w /tmp auctus sh -c 'curl -LO https://auctus.vida-nyu.org/snapshot/index.tar.gz && if [ -e index.snapshot ]; then rm -rf index.snapshot; fi && mkdir index.snapshot && tar xfC index.tar.gz index.snapshot && python /scripts/import_all.py index.snapshot; rm -rf index.snapshot'
6 | 


--------------------------------------------------------------------------------
/tests/data/dates_pivoted.converted.csv:
--------------------------------------------------------------------------------
 1 | color,date,value
 2 | green,2012-01-01,yes
 3 | green,2012-02-01,no
 4 | green,2012-03-01,no
 5 | green,2012-04-01,yes
 6 | green,2012-05-01,no
 7 | green,2012-06-01,yes
 8 | green,2012-07-01,yes
 9 | green,2012-08-01,yes
10 | green,2012-09-01,yes
11 | green,2012-10-01,no
12 | green,2012-11-01,no
13 | green,2012-12-01,yes
14 | red,2012-01-01,no
15 | red,2012-02-01,yes
16 | red,2012-03-01,yes
17 | red,2012-04-01,yes
18 | red,2012-05-01,no
19 | red,2012-06-01,no
20 | red,2012-07-01,no
21 | red,2012-08-01,yes
22 | red,2012-09-01,no
23 | red,2012-10-01,yes
24 | red,2012-11-01,yes
25 | red,2012-12-01,no
26 | 


--------------------------------------------------------------------------------
/frontend/src/components/MainMenu/MainMenu.css:
--------------------------------------------------------------------------------
 1 | .main-menu {
 2 |   float: right;
 3 |   color: #707070;
 4 |   font-size: 0.9rem;
 5 |   position: absolute;
 6 |   z-index: 1000;
 7 |   right: 10px;
 8 |   top: 7px;
 9 |   /* bottom: 0; */
10 | }
11 | 
12 | .card-menu {
13 |   border: 1px solid #ced4da;
14 |   border-radius: 3px;
15 |   background-color: #fff;
16 |   padding-top: 2px;
17 |   padding-bottom: 2px;
18 | }
19 | 
20 | .menu-link {
21 |   background-color: transparent;
22 |   padding: 0.8rem;
23 |   cursor: pointer;
24 | }
25 | 
26 | .menu-link:hover {
27 |   background-color: #f0f0f0;
28 | }
29 | 
30 | .menu-link a:hover {
31 |   text-decoration: none;
32 | }
33 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "./node_modules/gts/tsconfig-google.json",
 3 |   "compilerOptions": {
 4 |     "target": "es5",
 5 |     "lib": [
 6 |       "dom",
 7 |       "dom.iterable",
 8 |       "esnext"
 9 |     ],
10 |     "allowJs": true,
11 |     "skipLibCheck": true,
12 |     "esModuleInterop": true,
13 |     "allowSyntheticDefaultImports": true,
14 |     "strict": true,
15 |     "forceConsistentCasingInFileNames": true,
16 |     "module": "esnext",
17 |     "moduleResolution": "node",
18 |     "resolveJsonModule": true,
19 |     "isolatedModules": true,
20 |     "noEmit": true,
21 |     "jsx": "react"
22 |   },
23 |   "include": [
24 |     "src"
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/docs/schemas.rst:
--------------------------------------------------------------------------------
 1 | JSON Schemas
 2 | ============
 3 | 
 4 | ..  _schema-query:
 5 | 
 6 | Query
 7 | -----
 8 | 
 9 | JSON objects expected by `the search endpoint <../rest/#operation/search>`__.
10 | 
11 | ..  literalinclude:: schemas/query_input_schema.json
12 |     :language: json
13 |     :linenos:
14 | 
15 | ..  _schema-result:
16 | 
17 | Result schema
18 | -------------
19 | 
20 | Description of a dataset, such as a search result. `The search endpoint <../rest/#operation/search>`__ returns an array of those. They are also what you give the :func:`datamart_materialize.download`.
21 | 
22 | ..  literalinclude:: schemas/query_result_schema.json
23 |     :language: json
24 |     :linenos:
25 | 


--------------------------------------------------------------------------------
/docs/python/datamart-augmentation.rst:
--------------------------------------------------------------------------------
 1 | Augmentation library
 2 | ====================
 3 | 
 4 | This library performs data augmentation between datasets from Auctus. You can use it to augment a dataset with a search result directly on your side without relying on the server. It is also used internally by Auctus to perform augmentations (the ``/augment`` endpoint downloads the dataset using this library, performs augmentation, then sends the result to you).
 5 | 
 6 | Installing datamart-augmentation
 7 | --------------------------------
 8 | 
 9 | You can get it directly from the Python Package Index using PIP::
10 | 
11 |     pip install datamart-augmentation
12 | 
13 | API
14 | ---
15 | 
16 | TODO
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | /volumes
 3 | /docs/_build
 4 | 
 5 | # Python
 6 | *.py[co]
 7 | .ipynb_checkpoints
 8 | 
 9 | # Packages
10 | *.egg
11 | *.egg-info
12 | dist
13 | build
14 | eggs
15 | parts
16 | bin
17 | var
18 | sdist
19 | develop-eggs
20 | .installed.cfg
21 | lib
22 | lib64
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | 
27 | # Unit test / coverage reports
28 | .coverage
29 | .tox
30 | nosetests.xml
31 | 
32 | # Eclipse PyDev
33 | .project
34 | .pydevproject
35 | 
36 | # PyCharm
37 | .idea
38 | 
39 | # ViM
40 | .*.swp
41 | 
42 | # Emacs
43 | \#*#
44 | 
45 | # OS files
46 | .DS_Store
47 | desktop.ini
48 | 
49 | # Archives
50 | *.tar
51 | *.tar.gz
52 | *.tar.bz2
53 | *.zip
54 | *.whl
55 | 
56 | # Vagrant
57 | .vagrant
58 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/tests/data/lazo.csv:
--------------------------------------------------------------------------------
 1 | here's a header but the profiler will throw it out
 2 | 
 3 | dessert	year
 4 | pie	1991
 5 | cake	1991
 6 | candy	1990
 7 | cookie	1990
 8 | doughnut	1990
 9 | ice cream	1990
10 | pastry	1990
11 | tart	1990
12 | pudding	1990
13 | jello	1990
14 | apple	1990
15 | pear	1990
16 | banana	1990
17 | fruitcake	1990
18 | orange	1990
19 | petit four	1990
20 | pop tart	1990
21 | tiramisu	1990
22 | tres leches	1990
23 | calisson	1990
24 | chocolate	1990
25 | 	1990
26 | liquorice	1990
27 | nougat	1990
28 | coconut	1990
29 | marzipan	1990
30 | taffy	1990
31 | lemon	1990
32 | macaron	1990
33 | gingerbread	1990
34 | peanut butter	1990
35 | eclair	1990
36 | french toast	1990
37 | profiterole	1990
38 | caramel	1991
39 | milkshake	1991
40 | 


--------------------------------------------------------------------------------
/apiserver/apiserver/search/base.py:
--------------------------------------------------------------------------------
 1 | TOP_K_SIZE = 50
 2 | 
 3 | 
 4 | class ClientError(ValueError):
 5 |     """Error in query sent by client.
 6 |     """
 7 | 
 8 | 
 9 | def get_column_identifiers(es, column_names, dataset_id=None, data_profile=None):
10 |     column_indices = [-1 for _ in column_names]
11 |     if not data_profile:
12 |         columns = es.get('datasets', dataset_id, _source='columns.name')
13 |         columns = columns['_source']['columns']
14 |     else:
15 |         columns = data_profile['columns']
16 |     for i in range(len(columns)):
17 |         for j in range(len(column_names)):
18 |             if columns[i]['name'] == column_names[j]:
19 |                 column_indices[j] = i
20 |     return column_indices
21 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/stata.py:
--------------------------------------------------------------------------------
 1 | import pandas
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | def stata_to_csv(source_filename, dest_fileobj):
 7 |     for i, chunk in enumerate(
 8 |         pandas.read_stata(source_filename, iterator=True, chunksize=1)
 9 |     ):
10 |         chunk.to_csv(
11 |             dest_fileobj,
12 |             header=(i == 0),
13 |             float_format='%g',
14 |             date_format='%Y-%m-%dT%H:%M:%S',
15 |             index=False,
16 |             line_terminator='\r\n',
17 |         )
18 | 
19 | 
20 | class StataConverter(SimpleConverter):
21 |     """Adapter converting a Stata file to CSV.
22 |     """
23 |     transform = staticmethod(stata_to_csv)
24 | 


--------------------------------------------------------------------------------
/scripts/delete_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import lazo_index_service
 4 | import logging
 5 | import os
 6 | import sys
 7 | 
 8 | from datamart_core.common import PrefixedElasticsearch, \
 9 |     delete_dataset_from_index
10 | 
11 | 
12 | SIZE = 10000
13 | 
14 | 
15 | def delete(datasets):
16 |     es = PrefixedElasticsearch()
17 |     lazo_client = lazo_index_service.LazoIndexClient(
18 |         host=os.environ['LAZO_SERVER_HOST'],
19 |         port=int(os.environ['LAZO_SERVER_PORT'])
20 |     )
21 |     for dataset in datasets:
22 |         delete_dataset_from_index(es, dataset, lazo_client)
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     logging.basicConfig(level=logging.INFO)
27 | 
28 |     delete(sys.argv[1:])
29 | 


--------------------------------------------------------------------------------
/docker/nominatim.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mediagis/nominatim:3.3
 2 | 
 3 | # Increase memory limit
 4 | RUN find /etc -name php.ini -exec sed -i '/^memory_limit *= *[0-9]/ c memory_limit = 1024M' {} ';'
 5 | RUN find /app -name \*.php\* -exec sed -i "s/ini_set('memory_limit', *'[0-9]\+M');/ini_set('memory_limit', '1024M');/" {} ';'
 6 | 
 7 | # Enable batch mode
 8 | RUN bash -c "cd /app/src/build && echo $'--- settings/settings.php\\n\
 9 | +++ settings/settings.php\\n\
10 | @@ -106 +106 @@\\n\
11 | -@define(\\'CONST_Search_BatchMode\\', false);\\n\
12 | +@define(\\'CONST_Search_BatchMode\\', true);\\n\
13 | ' | patch -p0"
14 | 
15 | # Print apache log as well as postgres log
16 | RUN sed -i '/tail -f/ a tail -f /var/log/apache2/error.log &' start.sh
17 | 


--------------------------------------------------------------------------------
/scripts/report-uploads.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | # Query elasticsearch
 6 | RECORD="$(curl -s -H content-type:application/json -d '{"query":{"bool":{"should":[{"term":{"materialize.identifier":"datamart.url"}},{"term":{"materialize.identifier":"datamart.upload"}}]}}, "_source":["date", "name"]}' http://localhost:8020/_search?size=1000 \
 7 |     | jq -r '.hits.hits | sort_by(._source.date)[] | ._source.date + ": " + ._id + " (" + ._source.name + ")"' \
 8 |     | tail -n 1)"
 9 | LASTRECORD="$(cat $HOME/report-uploads.last)"
10 | if [ "$RECORD" != "$LASTRECORD" ]; then
11 |     echo "Check https://coordinator.auctus.vida-nyu.org/" \
12 |         | mail -s "New uploaded datasets" root
13 |     echo "$RECORD" >$HOME/report-uploads.last
14 | fi
15 | 


--------------------------------------------------------------------------------
/coordinator/coordinator/templates/errors.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% set active_page = "index" %}
 3 | 
 4 | {% block contents %}
 5 | <p>Datasets with error <span class="text-monospace">{{ error_type }}</span></p>
 6 | <ul>
 7 |   {% if datasets %}
 8 |   {% for dataset in datasets %}
 9 |   <li>
10 |     {{ dataset.metadata.name }} (<span class="text-monospace">{{ dataset.id }}</span>)<br/>
11 |     <details class="ml-4">
12 |       <summary class="text-monospace">{{ dataset.error_details.exception }}</summary>
13 |       <pre>{{ dataset.error_details.traceback }}</pre>
14 |     </details>
15 |   </li>
16 |   {% endfor %}
17 |   {% else %}
18 |   <li style="font-style: oblique;">No dataset with that error</li>
19 |   {% endif %}
20 | </ul>
21 | {% endblock %}
22 | 


--------------------------------------------------------------------------------
/tests/data/annotated.csv:
--------------------------------------------------------------------------------
 1 | id,lt_coord,lg_coord,height,stmo
 2 | place00,40.734746,-74.000077,85.772569,10
 3 | place01,40.728026,-73.998869,58.730197,10
 4 | place02,40.728278,-74.005837,51.929949,11
 5 | place03,40.726640,-73.993186,12.730146,9
 6 | place04,40.732466,-74.004689,44.452236,5
 7 | place05,40.722948,-74.001501,42.904820,12
 8 | place06,40.735108,-73.996996,48.345170,1
 9 | place07,40.727577,-74.002853,37.459986,2
10 | place08,40.730824,-74.002225,49.123637,4
11 | place09,40.729115,-74.001726,40.455639,6
12 | place10,40.734259,-73.996833,23.722705,6
13 | place11,40.723674,-73.991001,67.692448,7
14 | place12,40.728896,-73.998542,67.626361,8
15 | place13,40.728711,-74.002426,84.191461,12
16 | place14,40.733272,-73.996875,51.000673,12
17 | place15,40.726559,-74.000678,41.906452,11
18 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:14-buster AS build
 2 | 
 3 | RUN mkdir /src
 4 | RUN chown -R node /src
 5 | USER node
 6 | WORKDIR /src
 7 | 
 8 | COPY frontend/package.json frontend/package-lock.json /src/
 9 | RUN lock_hash="$(shasum -a 256 package-lock.json)" && \
10 |     npm install && \
11 |     echo "$lock_hash" | shasum -c
12 | COPY frontend /src/
13 | RUN npm run build
14 | 
15 | 
16 | FROM nginx:1.21
17 | 
18 | COPY --from=build --chown=0:0 /src/build /var/www/html
19 | COPY frontend/nginx.conf /etc/nginx/conf.d/default.conf
20 | 
21 | # nginx default CMD is ["nginx", "-g", "daemon off;"]
22 | CMD ["sh", "-c", "sed -i 's|<meta name=\"api_url\"[^>]\\+>|<meta name=\"api_url\" content=\"'\"$API_URL\"'\">|' /var/www/html/index.html && exec nginx -g \"daemon off; worker_shutdown_timeout 2s;\""]
23 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchBar/SearchBar.css:
--------------------------------------------------------------------------------
 1 | .SearchBar {
 2 |   max-width: 1000px;
 3 |   margin: 0 auto;
 4 | }
 5 | 
 6 | .SearchBar-input {
 7 |   height: 46px;
 8 |   border-right-width: 0;
 9 | }
10 | 
11 | .SearchBar-icon {
12 |   width: 46px;
13 |   margin: 0 auto;
14 |   background-color: transparent;
15 |   border-left-width: 0;
16 | }
17 | 
18 | .SearchBar-icon-active {
19 |   background-color: #63508b;
20 | }
21 | 
22 | .SearchBar-icon-active .feather {
23 |   color: #ffffff!important;
24 | }
25 | 
26 | .SearchBar-icon .feather {
27 |   margin: 0 auto;
28 |   color: #707070;
29 |   width: 24px;
30 |   height: 24px;
31 | }
32 | 
33 | .SearchBar input:focus {
34 |   box-shadow: none;
35 |   border-color: #ced4da!important;
36 | }
37 | 
38 | .SearchBar .input-group-append {
39 |   cursor: pointer;
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/ci.env:
--------------------------------------------------------------------------------
 1 | AUCTUS_DEBUG=yes
 2 | ELASTICSEARCH_HOSTS=elasticsearch:9200
 3 | ELASTICSEARCH_PREFIX=auctus_
 4 | LAZO_SERVER_HOST=lazo
 5 | LAZO_SERVER_PORT=50051
 6 | AMQP_HOST=rabbitmq
 7 | AMQP_PORT=5672
 8 | AMQP_USER=auctus
 9 | AMQP_PASSWORD=auctus
10 | ADMIN_PASSWORD=auctus
11 | S3_KEY=devkey
12 | S3_SECRET=devpassword
13 | S3_URL=http://minio:9000
14 | S3_CLIENT_URL=http://minio:9000
15 | S3_BUCKET_PREFIX=dev-
16 | AUCTUS_REQUEST_WHITELIST=test-discoverer
17 | AUCTUS_REQUEST_BLACKLIST=
18 | FRONTEND_URL=http://frontend
19 | API_URL=http://apilb:8002/api/v1
20 | MAX_CACHE_BYTES=100000000000
21 | NOMINATIM_URL=
22 | NOAA_TOKEN=
23 | CUSTOM_FIELDS={"specialId": {"label": "Special ID", "type": "integer"}, "dept": {"label": "Department", "type": "keyword", "required": true}}
24 | SENTRY_DSN=
25 | SENTRY_ENVIRONMENT=
26 | 


--------------------------------------------------------------------------------
/scripts/list_big_datasets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script lists datasets with a big size.
 4 | """
 5 | 
 6 | from datamart_core.common import PrefixedElasticsearch
 7 | 
 8 | 
 9 | SIZE = 10000
10 | 
11 | 
12 | def search():
13 |     es = PrefixedElasticsearch()
14 |     hits = es.scan(
15 |         index='datasets',
16 |         query={
17 |             'query': {
18 |                 'range': {
19 |                     "size": {
20 |                         "gt": 10000000000,  # 10 GB
21 |                     },
22 |                 },
23 |             },
24 |         },
25 |         _source='size',
26 |         size=SIZE,
27 |     )
28 |     for h in hits:
29 |         print("%s %.1f GB" % (h['_id'], h['_source']['size'] / 1000000000.0))
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     search()
34 | 


--------------------------------------------------------------------------------
/frontend/src/components/Logo/Logo.css:
--------------------------------------------------------------------------------
 1 | .logo-vertical {
 2 |   padding-top: 30px;
 3 |   padding-bottom: 30px;
 4 | }
 5 | 
 6 | .logo-vertical img {
 7 |   width: 190px;
 8 |   margin: 0 auto;
 9 | }
10 | 
11 | .logo-vertical span {
12 |   font-size: 60px;
13 |   line-height: 1;
14 |   margin-top: -10px;
15 | }
16 | 
17 | .logo-horizontal img {
18 |   width: 56px;
19 |   margin: 0 auto;
20 | }
21 | 
22 | .logo-horizontal span {
23 |   font-size: 25px;
24 |   line-height: 1;
25 |   margin-top: -10px;
26 | }
27 | 
28 | .logo-centered-horizontal {
29 |   text-align: center;
30 |   padding-top: 30px;
31 |   padding-bottom: 30px;
32 | }
33 | 
34 | /* don't decorate the logo if inside a link */
35 | a .logo-horizontal, a .logo-vertical, a:hover .logo-horizontal, a:hover .logo-vertical {
36 |   text-decoration: none;
37 |   color: #212529;
38 | }
39 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/tsv.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | def tsv_to_csv(source_filename, dest_fileobj, separator='\t'):
 7 |     with open(source_filename, 'r') as src_fp:
 8 |         src = csv.reader(src_fp, delimiter=separator)
 9 |         dst = csv.writer(dest_fileobj)
10 |         for line in src:
11 |             dst.writerow(line)
12 | 
13 | 
14 | class TsvConverter(SimpleConverter):
15 |     """Adapter converting a TSV or other separated file to CSV.
16 |     """
17 |     def __init__(self, writer, separator='\t'):
18 |         self.separator = separator
19 |         super(TsvConverter, self).__init__(writer)
20 | 
21 |     def transform(self, source_filename, dest_fileobj):
22 |         tsv_to_csv(source_filename, dest_fileobj, separator=self.separator)
23 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/parquet.py:
--------------------------------------------------------------------------------
 1 | import fastparquet
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | def parquet_to_csv(source_filename, dest_fileobj):
 7 |     src = fastparquet.ParquetFile(source_filename)
 8 |     for i, chunk in enumerate(src.iter_row_groups()):
 9 |         chunk.to_csv(
10 |             dest_fileobj,
11 |             header=(i == 0),
12 |             float_format='%g',
13 |             date_format='%Y-%m-%dT%H:%M:%S',
14 |             index=False,
15 |             line_terminator='\r\n',
16 |         )
17 | 
18 | 
19 | class ParquetConverter(SimpleConverter):
20 |     """Adapter pivoting a table.
21 |     """
22 |     def transform(self, source_filename, dest_fileobj):
23 |         parquet_to_csv(
24 |             source_filename,
25 |             dest_fileobj,
26 |         )
27 | 


--------------------------------------------------------------------------------
/docker/haproxy.conf:
--------------------------------------------------------------------------------
 1 | global
 2 |     master-worker no-exit-on-failure
 3 | 
 4 | defaults
 5 |     mode http
 6 |     balance roundrobin
 7 |     option httplog
 8 |     timeout connect 5000
 9 |     timeout client 5000
10 |     timeout server 5000
11 |     timeout http-request 900s
12 |     timeout server 900s
13 | 
14 | frontend stats
15 |     bind :8004
16 |     http-request use-service prometheus-exporter if { path /metrics }
17 |     stats enable
18 |     stats uri /
19 | 
20 | resolvers systemdns
21 |     parse-resolv-conf
22 |     hold timeout 120s
23 |     hold refused 120s
24 | 
25 | frontend api-in
26 |     bind *:8002
27 |     default_backend api-servers
28 | 
29 | backend api-servers
30 |     option httpchk GET /health
31 |     server-template apiserver 20 apiserver:8002 maxconn 1 check inter 1000 rise 2 fall 1 resolvers systemdns resolve-opts prevent-dup-ip
32 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Auctus's documentation!
 2 | ==================================
 3 | 
 4 | Auctus is a dataset search engine and data augmentation platform developed at New York University. It can be used to index the content of datasets from a variety of sources, that can later be queried to find data that can be joined or appended to a user's data.
 5 | 
 6 | The system can be found at this address: https://auctus.vida-nyu.org/
 7 | 
 8 | You can find the source code on GitLab: https://gitlab.com/ViDA-NYU/auctus/auctus
 9 | 
10 | ..  toctree::
11 |     :maxdepth: 2
12 |     :caption: Contents:
13 | 
14 |     webui
15 |     Using the REST API <https://docs.auctus.vida-nyu.org/rest/>
16 |     schemas
17 |     python/index
18 |     internals
19 | 
20 | Indices and tables
21 | ==================
22 | 
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 | 


--------------------------------------------------------------------------------
/frontend/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": ["./node_modules/gts", "plugin:react/recommended"],
 3 |   "env": {
 4 |     "browser": true,
 5 |     "node": false,
 6 |     "jest": true
 7 |   },
 8 |   "rules": {
 9 |     "node/no-unsupported-features/node-builtins": ["off"],
10 |     "node/no-extraneous-import": ["off"],
11 |     "node/no-unpublished-import": ["off"],
12 |     "react/prop-types": ["off"],
13 |     "prefer-const": [
14 |       "error",
15 |       {
16 |         "destructuring": "all",
17 |         "ignoreReadBeforeAssign": false
18 |       }
19 |     ],
20 |     "@typescript-eslint/ban-types": [
21 |       "error",
22 |       {
23 |         "extendDefaults": true,
24 |         "types": {
25 |           "{}": false
26 |         }
27 |       }
28 |     ]
29 |   },
30 |   "settings": {
31 |     "react": {
32 |       "version": "detect"
33 |     }
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/docs/python/datamart-rest.rst:
--------------------------------------------------------------------------------
 1 | API client
 2 | ==========
 3 | 
 4 | A client library for `the REST API <../rest>`__ is available for convenience. It supports searching, downloading, and augmenting datasets.
 5 | 
 6 | It can perform some operations both on the client-side (for speed, the server has limited capacity; also saves time by not uploading the data) and on the server-side in "proxy mode" (working around the need to install and configure some dependencies on the client, and taking advantage of cached results on the server).
 7 | 
 8 | Installing datamart-rest
 9 | ------------------------
10 | 
11 | You can get it directly from the Python Package Index using PIP::
12 | 
13 |     pip install datamart-rest
14 | 
15 | API
16 | ---
17 | 
18 | The REST client is currently maintained as part of the D3M project, with `documentation available here <https://datadrivendiscovery.gitlab.io/datamart-api/>`__.
19 | 


--------------------------------------------------------------------------------
/tests/__main__.py:
--------------------------------------------------------------------------------
 1 | import locale
 2 | import os
 3 | import sys
 4 | import unittest
 5 | 
 6 | 
 7 | top_level = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 8 | start_dir = os.path.join(top_level, 'tests')
 9 | if top_level not in sys.path:
10 |     sys.path.insert(0, top_level)
11 | 
12 | 
13 | sys.path.append(start_dir)
14 | 
15 | 
16 | class Program(unittest.TestProgram):
17 |     def createTests(self):
18 |         if self.testNames is None:
19 |             self.test = self.testLoader.discover(
20 |                 start_dir=start_dir,
21 |                 top_level_dir=top_level,
22 |                 pattern='test_*.py')
23 |         else:
24 |             self.test = self.testLoader.loadTestsFromNames(self.testNames)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     # Locale
29 |     locale.setlocale(locale.LC_ALL, '')
30 | 
31 |     prog = Program(argv=['tests'] + sys.argv[1:])
32 | 


--------------------------------------------------------------------------------
/frontend/src/components/Badges/Badges.css:
--------------------------------------------------------------------------------
 1 | .badge-group > * {
 2 |   display: inline-flex;
 3 |   margin-right: 0.25rem;
 4 | }
 5 | 
 6 | .badge-group > *:last-child {
 7 |   margin-right: 0px;
 8 | }
 9 | 
10 | .badge-column, .badge-numerical, .badge-textual {
11 |   background-color: #f0f0f0;
12 |   font-size: .7rem;
13 | }
14 | 
15 | .badge-numerical svg {
16 |   color: #1ab082;
17 | }
18 | 
19 | .badge-textual svg {
20 |   color: #4d96b2;
21 | }
22 | 
23 | .badge-categorical {
24 |   color: #fff;
25 |   background-color: #4d96b2;
26 | }
27 | 
28 | .badge-number {
29 |   color: #fff;
30 |   background-color: #1ab082;
31 | }
32 | 
33 | .badge-corner-button{
34 |   width: 12px;
35 |   height: 12px;
36 |   margin-top: -18px;
37 |   margin-left: 1px;
38 |   padding: 0px;
39 |   margin-right: -6px;
40 | }
41 | 
42 | .badge-corner-button svg{
43 |   color:#fdfdfe;
44 |   fill: #757575;
45 |   stroke-width: 2;
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/scripts/list_sources.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script gives a summary of the dataset sources.
 4 | """
 5 | 
 6 | from datamart_core.common import PrefixedElasticsearch
 7 | 
 8 | 
 9 | SIZE = 10000
10 | 
11 | 
12 | def count():
13 |     es = PrefixedElasticsearch()
14 |     sources = {}
15 |     hits = es.scan(
16 |         index='datasets',
17 |         query={
18 |             'query': {
19 |                 'match_all': {},
20 |             },
21 |         },
22 |         _source='source',
23 |         size=SIZE,
24 |     )
25 |     for h in hits:
26 |         source = h['_source']['source']
27 | 
28 |         try:
29 |             sources[source] += 1
30 |         except KeyError:
31 |             sources[source] = 1
32 | 
33 |     for identifier, count in sorted(sources.items(), key=lambda p: -p[1]):
34 |         print('{: 6d} {}'.format(count, identifier))
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     count()
39 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | **/.env
 2 | volumes
 3 | docs
 4 | frontend/node_modules
 5 | frontend/build
 6 | lib_geo/data
 7 | 
 8 | # Git
 9 | **/.git
10 | **/.gitignore
11 | 
12 | # Python
13 | **/__pycache__
14 | **/*.pyc
15 | **/.ipynb_checkpoints
16 | 
17 | # Packages
18 | **/*.egg
19 | **/*.egg-info
20 | **/dist
21 | **/build
22 | **/eggs
23 | **/parts
24 | **/bin
25 | **/var
26 | **/sdist
27 | **/develop-eggs
28 | **/.installed.cfg
29 | **/lib
30 | **/lib64
31 | 
32 | # Installer logs
33 | **/pip-log.txt
34 | 
35 | # Unit test / coverage reports
36 | **/.coverage
37 | **/.tox
38 | **/nosetests.xml
39 | 
40 | # Eclipse PyDev
41 | **/.project
42 | **/.pydevproject
43 | 
44 | # PyCharm
45 | **/.idea
46 | 
47 | # ViM
48 | **/.*.swp
49 | 
50 | # Emacs
51 | **/#*#
52 | 
53 | # OS files
54 | **/.DS_Store
55 | **/desktop.ini
56 | 
57 | # Archives
58 | **/*.tar
59 | **/*.tar.gz
60 | **/*.tar.bz2
61 | **/*.zip
62 | **/*.whl
63 | 
64 | # Vagrant
65 | **/.vagrant
66 | 


--------------------------------------------------------------------------------
/frontend/src/App.test.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import * as ReactDOM from 'react-dom';
 3 | import * as api from './api/rest';
 4 | import {render} from '@testing-library/react';
 5 | import {App} from './App';
 6 | import 'jest-canvas-mock';
 7 | 
 8 | beforeEach(() => {
 9 |   jest.spyOn(api, 'status').mockImplementation(() =>
10 |     Promise.resolve({
11 |       recent_discoveries: [],
12 |       sources_counts: {
13 |         remi: 23,
14 |         fernando: 37,
15 |       },
16 |     })
17 |   );
18 | });
19 | 
20 | afterEach(() => jest.restoreAllMocks());
21 | 
22 | test('renders main app', () => {
23 |   const {getByText} = render(<App />);
24 |   const linkElement = getByText(/Auctus/i);
25 |   expect(linkElement).toBeInTheDocument();
26 | });
27 | 
28 | test('renders without crashing', () => {
29 |   const div = document.createElement('div');
30 |   ReactDOM.render(<App />, div);
31 |   ReactDOM.unmountComponentAtNode(div);
32 | });
33 | 


--------------------------------------------------------------------------------
/coordinator/coordinator/templates/login.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% set active_page = "login" %}
 3 | 
 4 | {% block contents %}
 5 | <form action="{{ reverse_url('login') }}" method="post">
 6 |   {{ xsrf_form_html() }}
 7 |   {% if error %}
 8 |   <div class="alert alert-danger" role="alert">
 9 |     {{ error }}
10 |   </div>
11 |   {% endif %}
12 |   <input type="hidden" name="next" value="{{ next }}" />
13 |   <div class="form-group row">
14 |     <label for="password" class="col-sm-2 col-form-label">Password</label>
15 |     <div class="col-sm-10">
16 |       <input type="password" class="form-control" id="password" name="password" required autocomplete="current-password" />
17 |     </div>
18 |     <div class="form-group row">
19 |       <div class="col-sm-10 offset-sm-2">
20 |         <button type="submit" name="log-in" class="btn btn-primary">Log in</button>
21 |       </div>
22 |     </div>
23 |   </div>
24 | </form>
25 | {% endblock %}
26 | 


--------------------------------------------------------------------------------
/frontend/src/components/Upload/Upload.css:
--------------------------------------------------------------------------------
 1 | .dropdown {
 2 |   position: relative;
 3 |   display: inline-block;
 4 | }
 5 | 
 6 | .dropdown-content {
 7 |   display: none;
 8 |   position: absolute;
 9 |   background-color: #f1f1f1;
10 |   min-width: 110px;
11 |   box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
12 |   z-index: 4;
13 |   max-height: 16vh;
14 |   overflow-y: auto;
15 |   margin-left: -4px;
16 | }
17 | 
18 | .dropdown-content div {
19 |   color: black;
20 |   padding: 4px 4px;
21 |   text-decoration: none;
22 |   display: block;
23 | }
24 | 
25 | .dropdown:hover .dropdown-content {
26 |   display: inline-block;
27 | }
28 | 
29 | .badge-button{
30 |   width: 12px;
31 |   height: 12px;
32 |   margin-top: -18px;
33 |   margin-left: 1px;
34 |   padding: 0px;
35 | }
36 | 
37 | .dropdown-content div:hover {background-color: #ddd;}
38 | 
39 | .dropdown:hover .dropdown-content {display: block;}
40 | 
41 | .dropdown:hover .dropbtn {background-color: #3e8e41;}
42 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/frontend/src/components/FilterContainer/FilterContainer.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import * as Icon from 'react-feather';
 3 | 
 4 | class FilterContainer extends React.PureComponent<{
 5 |   title: string;
 6 |   onClose: () => void;
 7 | }> {
 8 |   render() {
 9 |     return (
10 |       <div className="mt-2 mb-2">
11 |         <div>
12 |           <h6 className="d-inline">{this.props.title}</h6>
13 |           <span
14 |             onClick={() => this.props.onClose()}
15 |             className="d-inline text-muted ml-1"
16 |             style={{cursor: 'pointer'}}
17 |             title="Remove this filter"
18 |           >
19 |             <Icon.Trash2
20 |               className="feather feather"
21 |               style={{marginBottom: '2px'}}
22 |             />
23 |           </span>
24 |         </div>
25 |         <div className="d-block">{this.props.children}</div>
26 |       </div>
27 |     );
28 |   }
29 | }
30 | 
31 | export {FilterContainer};
32 | 


--------------------------------------------------------------------------------
/lib_materialize/README.rst:
--------------------------------------------------------------------------------
 1 | Datamart materialization library
 2 | ================================
 3 | 
 4 | This library can materialize datasets from Auctus, NYU's dataset search engine. You can use it to materialize search results directly on your side without relying on the server. It is also used internally by the service to materialize datasets (the ``/download`` endpoint downloads the dataset using this library then sends it to you).
 5 | 
 6 | See also:
 7 | 
 8 | * `The datamart-rest library for search/augmentation <https://pypi.org/project/datamart-rest/>`__
 9 | * `The datamart-profiler library, used to profile datasets for search <https://pypi.org/project/datamart-profiler/>`__
10 | * `The datamart-augmentation library, used to performs data augmentation with a dataset from Auctus <https://pypi.org/project/datamart-augmentation/>`__
11 | * `Auctus, NYU's dataset search engine <https://auctus.vida-nyu.org/>`__
12 | * `Our project on GitLab <https://gitlab.com/ViDA-NYU/auctus/auctus>`__
13 | 


--------------------------------------------------------------------------------
/lib_augmentation/README.rst:
--------------------------------------------------------------------------------
 1 | Datamart augmentation library
 2 | =============================
 3 | 
 4 | This library performs data augmentation between datasets from Auctus, NYU's dataset search engine. You can use it to augment a dataset with a search result directly on your side without relying on the server. It is also used internally by the service to perform augmentations (the ``/augment`` endpoint downloads the dataset using this library, performs augmentation, then sends the result to you).
 5 | 
 6 | See also:
 7 | 
 8 | * `The datamart-rest library for search/augmentation <https://pypi.org/project/datamart-rest/>`__
 9 | * `The datamart-profiler library, used to profile datasets for search <https://pypi.org/project/datamart-profiler/>`__
10 | * `The datamart-materialize library, used to materialize dataset from search results <https://pypi.org/project/datamart-materialize/>`__
11 | * `Auctus, NYU's dataset search engine <https://auctus.vida-nyu.org/>`__
12 | * `Our project on GitLab <https://gitlab.com/ViDA-NYU/auctus/auctus>`__
13 | 


--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Icon sizes
 3 |  */
 4 | 
 5 | .feather {
 6 |   width: 15px;
 7 |   height: 17px;
 8 |   vertical-align: text-bottom;
 9 | }
10 | 
11 | .feather-xs {
12 |   width: 13px;
13 |   height: 13px;
14 |   vertical-align: text-bottom;
15 | }
16 | 
17 | .feather-xs-w {
18 |   width: 17px;
19 |   height: 13px;
20 |   vertical-align: text-bottom;
21 | }
22 | 
23 | .feather-lg {
24 |   width: 20px;
25 |   height: 20px;
26 |   vertical-align: text-bottom;
27 | }
28 | 
29 | .btn .feather {
30 |   width: 18px;
31 |   height: 20px;
32 | }
33 | 
34 | .btn-sm .feather {
35 |   width: 16px;
36 |   height: 21px;
37 | }
38 | 
39 | /* to be applied to buttons that should look like a simple link */
40 | .btn-link {
41 |   border: none;
42 |   background: none;
43 |   text-decoration: underline;
44 |   cursor: pointer;
45 |   padding: 0px;
46 | }
47 | 
48 | html, body, body #root, .container-vh-full {
49 |   overflow: hidden;
50 |   height: 100%;
51 | }
52 | 
53 | .container-vh-scroll {
54 |   overflow: auto;
55 |   height: 100%;
56 | }
57 | 


--------------------------------------------------------------------------------
/lib_profiler/README.rst:
--------------------------------------------------------------------------------
 1 | Datamart profiling library
 2 | ==========================
 3 | 
 4 | This library can profile datasets for use with Auctus, NYU's dataset search engine. You can use it to profile datasets on your side and send that to the server for search, instead of uploading the whole dataset. It is also used internally by the service to process search-by-example queries (when sending a file to the ``/search`` endpoint) and to add datasets to the index (to be queried against later).
 5 | 
 6 | See also:
 7 | 
 8 | * `The datamart-rest library for search/augmentation <https://pypi.org/project/datamart-rest/>`__
 9 | * `The datamart-materialize library, used to materialize dataset from search results <https://pypi.org/project/datamart-materialize/>`__
10 | * `The datamart-augmentation library, used to performs data augmentation with a dataset from Auctus <https://pypi.org/project/datamart-augmentation/>`__
11 | * `Auctus, NYU's dataset search engine <https://auctus.vida-nyu.org/>`__
12 | * `Our project on GitLab <https://gitlab.com/ViDA-NYU/auctus/auctus>`__
13 | 


--------------------------------------------------------------------------------
/env.default:
--------------------------------------------------------------------------------
 1 | # Fill that in and rename to .env
 2 | 
 3 | AUCTUS_DEBUG=no
 4 | ELASTICSEARCH_HOSTS=127.0.0.1:8020
 5 | ELASTICSEARCH_PREFIX=auctus_
 6 | LAZO_SERVER_HOST=127.0.0.1
 7 | LAZO_SERVER_PORT=8030
 8 | AMQP_HOST=127.0.0.1
 9 | AMQP_PORT=8011
10 | AMQP_USER=auctus
11 | AMQP_PASSWORD=auctus
12 | ADMIN_PASSWORD=auctus
13 | S3_KEY=devkey
14 | S3_SECRET=devpassword
15 | S3_URL=http://minio:9000
16 | S3_CLIENT_URL=http://127.0.0.1:8050
17 | S3_BUCKET_PREFIX=dev-
18 | GCS_PROJECT=
19 | GCS_CREDS=
20 | GCS_BUCKET_PREFIX=dev-
21 | AUCTUS_REQUEST_WHITELIST=test-discoverer
22 | AUCTUS_REQUEST_BLACKLIST=
23 | FRONTEND_URL=http://127.0.0.1:8001
24 | API_URL=http://127.0.0.1:8002/api/v1
25 | MAX_CACHE_BYTES=100000000000
26 | # Set to an empty string to disable address resolution
27 | NOMINATIM_URL=http://nominatim
28 | NOAA_TOKEN=
29 | ISI_DATAMART_URL=https://datamart:datamart-api-789@dsbox02.isi.edu/datamart-api
30 | CUSTOM_FIELDS={"specialId": {"label": "Special ID", "type": "integer"}, "dept": {"label": "Department", "type": "keyword", "required": true}}
31 | SENTRY_DSN=
32 | SENTRY_ENVIRONMENT=test
33 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchResults/SimpleBar.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import {SearchFacet} from '../../api/types';
 3 | 
 4 | interface SimpleBarProps {
 5 |   facetBuckets: SearchFacet;
 6 |   keyname: string;
 7 |   totalResults: number;
 8 | }
 9 | class SimpleBar extends React.PureComponent<SimpleBarProps> {
10 |   render() {
11 |     const {facetBuckets, keyname, totalResults} = this.props;
12 |     const rectangleWidth = 200;
13 |     return (
14 |       <svg width="400" height="20">
15 |         <rect
16 |           x="0"
17 |           y="5"
18 |           width={
19 |             (facetBuckets.buckets[keyname] * rectangleWidth) / totalResults
20 |           }
21 |           height="14"
22 |           style={{
23 |             fill: '#C0C0C0',
24 |             stroke: '#BEBEBE',
25 |             strokeWidth: 1,
26 |             opacity: 0.5,
27 |           }}
28 |         />
29 |         <text x="3" y="17" fontFamily="Verdana" fontSize="9" fill="#707070">
30 |           {facetBuckets.buckets[keyname]}
31 |         </text>
32 |       </svg>
33 |     );
34 |   }
35 | }
36 | 
37 | export {SimpleBar};
38 | 


--------------------------------------------------------------------------------
/tests/data/temporal.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | import os
 3 | import random
 4 | 
 5 | 
 6 | def main():
 7 |     data_dir = os.path.dirname(__file__)
 8 | 
 9 |     with open(os.path.join(data_dir, 'daily.csv'), 'w') as f_daily:
10 |         print('aug_date,rain', file=f_daily)
11 |         date = datetime(2019, 4, 23)
12 |         rand = random.Random(1)
13 |         for _ in range(30):
14 |             time = date.date().strftime('%Y%m%d')
15 |             boolean = ['no', 'yes'][rand.randint(0, 1)]
16 |             print('%s,%s' % (time, boolean), file=f_daily)
17 |             date += timedelta(days=1)
18 | 
19 |     with open(os.path.join(data_dir, 'hourly.csv'), 'w') as f_hourly:
20 |         print('aug_date,rain', file=f_hourly)
21 |         date = datetime(2019, 6, 12)
22 |         rand = random.Random(2)
23 |         for _ in range(52):
24 |             time = date.isoformat()
25 |             boolean = ['no', 'yes'][rand.randint(0, 1)]
26 |             print('%s,%s' % (time, boolean), file=f_hourly)
27 |             date += timedelta(hours=1)
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     main()
32 | 


--------------------------------------------------------------------------------
/lib_fslock/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 Remi Rampin
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/Button/Button.tsx:
--------------------------------------------------------------------------------
 1 | import {Tooltip} from '@material-ui/core';
 2 | import React from 'react';
 3 | import {Spinner} from '../../visus/Loading/Spinner';
 4 | import './Button.css';
 5 | 
 6 | const SubmitButton = (props: {label: string; loading: boolean}) => (
 7 |   <button type="submit" className="btn btn-primary" disabled={props.loading}>
 8 |     {props.loading && (
 9 |       <span className="mr-2">
10 |         <Spinner />
11 |       </span>
12 |     )}
13 |     {props.label}
14 |   </button>
15 | );
16 | 
17 | function ButtonGroup(props: React.PropsWithChildren<{}>) {
18 |   return <div className="button-group">{props.children}</div>;
19 | }
20 | 
21 | function LinkButton(
22 |   props: React.PropsWithChildren<{href: string; message?: string}>
23 | ) {
24 |   return (
25 |     <Tooltip
26 |       title={props.message === undefined ? '' : props.message}
27 |       placement="top"
28 |       arrow
29 |     >
30 |       <a className="btn btn-sm btn-outline-primary" href={props.href}>
31 |         {props.children}
32 |       </a>
33 |     </Tooltip>
34 |   );
35 | }
36 | 
37 | export {SubmitButton, ButtonGroup, LinkButton};
38 | 


--------------------------------------------------------------------------------
/frontend/src/components/GeoSpatialCoverageMap/GeoSpatialCoverageMap.css:
--------------------------------------------------------------------------------
 1 | .map {
 2 |   height: 400px;
 3 |   width: 100%;
 4 | }
 5 | 
 6 | .ol-popup {
 7 |   position: absolute;
 8 |   background-color: white;
 9 |   -webkit-filter: drop-shadow(0 1px 4px rgba(0,0,0,0.2));
10 |   filter: drop-shadow(0 1px 4px rgba(0,0,0,0.2));
11 |   padding: 15px;
12 |   border-radius: 10px;
13 |   border: 1px solid #cccccc;
14 |   bottom: 12px;
15 |   left: -50px;
16 |   min-width: max-content;
17 |   width: max-content;
18 | }
19 | 
20 | .ol-popup:after, .ol-popup:before {
21 |   top: 100%;
22 |   border: solid transparent;
23 |   content: " ";
24 |   height: 0;
25 |   width: 0;
26 |   position: absolute;
27 |   pointer-events: none;
28 | }
29 | 
30 | .ol-popup:after {
31 |   border-top-color: white;
32 |   border-width: 10px;
33 |   left: 48px;
34 |   margin-left: -10px;
35 | }
36 | 
37 | .ol-popup:before {
38 |   border-top-color: #cccccc;
39 |   border-width: 11px;
40 |   left: 48px;
41 |   margin-left: -11px;
42 | }
43 | 
44 | .legend{
45 |   position:relative;
46 |   left: 10px;
47 |   top:-84px;
48 |   z-index:10000;
49 |   height: 54px;
50 |   background-color:#fdfcfb;
51 |   border-radius: 6px;
52 | }
53 | 


--------------------------------------------------------------------------------
/lib_core/datamart_core/prom.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | from prometheus_async.aio import time as prom_async_time
 3 | 
 4 | 
 5 | class PromMeasureRequest(object):
 6 |     def __init__(self, count, time):
 7 |         self.count = count
 8 |         self.time = time
 9 | 
10 |     def _wrap(self, *labels, timer):
11 |         if labels:
12 |             counter = self.count.labels(*labels)
13 |         else:
14 |             counter = self.count
15 |         if labels:
16 |             timer = timer(self.time.labels(*labels))
17 |         else:
18 |             timer = timer(self.time)
19 | 
20 |         # Initialize count
21 |         counter.inc(0)
22 | 
23 |         def decorator(func):
24 |             @contextlib.wraps(func)
25 |             def wrapper(*args, **kwargs):
26 |                 # Count requests
27 |                 counter.inc()
28 |                 return func(*args, **kwargs)
29 | 
30 |             return timer(wrapper)
31 | 
32 |         return decorator
33 | 
34 |     def sync(self, *labels):
35 |         return self._wrap(*labels, timer=lambda metric: metric.time())
36 | 
37 |     def async_(self, *labels):
38 |         return self._wrap(*labels, timer=lambda metric: prom_async_time(metric))
39 | 


--------------------------------------------------------------------------------
/lib_profiler/datamart_profiler/warning_tools.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import warnings
 3 | 
 4 | 
 5 | @contextlib.contextmanager
 6 | def ignore_warnings(*categories):
 7 |     """Context manager to ignore specific warning categories.
 8 |     """
 9 |     orig_showarning = warnings.showwarning
10 | 
11 |     def record(message, category, filename, lineno, file=None, line=None):
12 |         if not any(issubclass(category, c) for c in categories):
13 |             orig_showarning(message, category, filename, lineno, file, line)
14 | 
15 |     try:
16 |         warnings.showwarning = record
17 |         yield
18 |     finally:
19 |         warnings.showwarning = orig_showarning
20 | 
21 | 
22 | @contextlib.contextmanager
23 | def raise_warnings(*categories):
24 |     orig_showarning = warnings.showwarning
25 | 
26 |     def record(message, category, filename, lineno, file=None, line=None):
27 |         if any(issubclass(category, c) for c in categories):
28 |             raise category(message)
29 |         orig_showarning(message, category, filename, lineno, file, line)
30 | 
31 |     try:
32 |         warnings.showwarning = record
33 |         yield
34 |     finally:
35 |         warnings.showwarning = orig_showarning
36 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/common.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | class UnsupportedConversion(ValueError):
 7 |     """This conversion cannot work."""
 8 | 
 9 | 
10 | def skip_rows(source_filename, dest_fileobj, nb_rows):
11 |     with open(source_filename, 'r') as src_fp:
12 |         src = iter(csv.reader(src_fp))
13 |         dst = csv.writer(dest_fileobj)
14 | 
15 |         # Skip rows
16 |         for i in range(nb_rows):
17 |             try:
18 |                 next(src)
19 |             except StopIteration:
20 |                 raise ValueError(
21 |                     "Can't skip %d rows, table only has %d" % (nb_rows, i),
22 |                 )
23 | 
24 |         # Copy rest
25 |         for row in src:
26 |             dst.writerow(row)
27 | 
28 | 
29 | class SkipRowsConverter(SimpleConverter):
30 |     """Adapter skipping a given number of rows from a CSV file.
31 |     """
32 |     def __init__(self, writer, *, nb_rows):
33 |         super(SkipRowsConverter, self).__init__(writer)
34 |         self.nb_rows = nb_rows
35 | 
36 |     def transform(self, source_filename, dest_fileobj):
37 |         skip_rows(source_filename, dest_fileobj, self.nb_rows)
38 | 


--------------------------------------------------------------------------------
/docs/python/datamart-profiler.rst:
--------------------------------------------------------------------------------
 1 | Profiling library
 2 | =================
 3 | 
 4 | This library can be used to profile datasets standalone. You can use it to profile datasets on your side and send that to Auctus for search, instead of uploading the whole dataset. It is also used internally by Auctus to process search-by-example queries (when sending a file to the ``/search`` endpoint) and to add datasets to the index (to be queried against later).
 5 | 
 6 | Installing datamart-profiler
 7 | ----------------------------
 8 | 
 9 | You can get it directly from the Python Package Index using PIP::
10 | 
11 |     pip install datamart-profiler
12 | 
13 | API
14 | ---
15 | 
16 | The :py:func:`datamart_profiler.process_dataset` function is the entrypoint for the library. It returns a dict following Auctus's JSON result schema.
17 | 
18 | ..  autofunction:: datamart_profiler.core.process_dataset
19 | 
20 | ..  autofunction:: datamart_profiler.temporal.parse_date
21 | 
22 | ..  autofunction:: datamart_profiler.core.count_rows_to_skip
23 | 
24 | Command-line usage
25 | ------------------
26 | 
27 | You can also use datamart-profiler from the command-line like so::
28 | 
29 |     $ python -m datamart_profiler <file.csv>
30 | 
31 | It will output the extracted metadata as JSON.
32 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchResults/DatasetSample.css:
--------------------------------------------------------------------------------
 1 | /* https://www.colorbox.io/#steps=7#hue_start=209#hue_end=172#hue_curve=easeInQuad#sat_start=12#sat_end=90#sat_curve=easeOutCubic#sat_rate=130#lum_start=84#lum_end=53#lum_curve=easeOutQuad#minor_steps_map=0 */
 2 | .badge.semtype {
 3 |   background-color: #b1c4d5;
 4 |   color: white;
 5 | }
 6 | 
 7 | .badge.semtype.semtype-text {
 8 |   background-color: #aec2d4;
 9 | }
10 | 
11 | .badge.semtype.semtype-boolean {
12 |   background-color: #a1bbce;
13 | }
14 | 
15 | .badge.semtype.semtype-enumeration {
16 |   background-color: #89b1c4;
17 | }
18 | 
19 | .badge.semtype.semtype-identifier {
20 |   background-color: #5fa4b5;
21 | }
22 | 
23 | .badge.semtype.semtype-latitude, .badge.semtype.semtype-longitude {
24 |   background-color: #249ca0;
25 | }
26 | 
27 | .badge.semtype.semtype-datetime {
28 |   background-color: #008775;
29 | }
30 | 
31 | #vg-tooltip-element {
32 |   z-index: 2000;
33 | }
34 | 
35 | .chip-btn-download {
36 |   margin-left: 0;
37 |   margin-bottom: 0.1rem!important;
38 |   margin-top: -3px;
39 |   color: rgb(0, 0, 0, 0.5);
40 |   cursor: pointer;
41 |   user-select: none;
42 |   -webkit-tap-highlight-color: transparent;
43 | }
44 | 
45 | .chip-btn-download:hover {
46 |   color: rgb(0, 0, 0, 1.0);
47 | }
48 | 


--------------------------------------------------------------------------------
/scripts/clear_caches.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script clears the cache folders safely.
 4 | 
 5 | This should not result in any data being lost or affect any running process.
 6 | """
 7 | 
 8 | import logging
 9 | import os
10 | import sys
11 | 
12 | from datamart_fslock.cache import clear_cache
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     logging.basicConfig(level=logging.INFO)
17 | 
18 |     if sys.argv[1:] == []:
19 |         only_if_possible = False
20 |     elif sys.argv[1:] == ['--if-possible']:
21 |         only_if_possible = True
22 |     else:
23 |         print("Usage: clear_caches.py [--if-possible]", file=sys.stderr)
24 |         sys.exit(2)
25 | 
26 |     if (
27 |         not os.path.isdir('/cache/datasets') or
28 |         not os.path.isdir('/cache/aug') or
29 |         not os.path.isdir('/cache/user_data')
30 |     ):
31 |         print(
32 |             "Cache directories don't exist; are you not running this script "
33 |             "inside Docker?",
34 |             file=sys.stderr,
35 |         )
36 |         sys.exit(1)
37 |     clear_cache('/cache/datasets', only_if_possible=only_if_possible)
38 |     clear_cache('/cache/aug', only_if_possible=only_if_possible)
39 |     clear_cache('/cache/user_data', only_if_possible=only_if_possible)
40 | 


--------------------------------------------------------------------------------
/scripts/canonicalize_yaml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script sorts YAML documents and objects to allow diffing.
 4 | 
 5 | It loads multiple YAML files, orders the documents by 'metadata/kind' and
 6 | 'metadata/name', sorts the keys of each objects alphabetically, and dumps it
 7 | all to stdout.
 8 | 
 9 | In addition, it also sorts the 'env:' list/map.
10 | 
11 | Usage:
12 |     find yaml -type f -print0 | xargs -0 python canonicalize_yaml.py
13 | """
14 | 
15 | import sys
16 | import yaml
17 | 
18 | 
19 | def sort_env(obj):
20 |     if isinstance(obj, list):
21 |         return [sort_env(i) for i in obj]
22 |     elif isinstance(obj, dict):
23 |         return {
24 |             k: (
25 |                 sorted(v, key=lambda i: i['name']) if k == 'env'
26 |                 else sort_env(v)
27 |             )
28 |             for k, v in obj.items()
29 |         }
30 |     else:
31 |         return obj
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     objs = []
36 |     for filename in sys.argv[1:]:
37 |         with open(filename, 'r') as fp_in:
38 |             objs.extend(yaml.safe_load_all(fp_in))
39 | 
40 |     objs = [sort_env(o) for o in objs]
41 |     objs = sorted(objs, key=lambda o: (o['kind'], o['metadata']['name']))
42 | 
43 |     yaml.safe_dump_all(objs, sys.stdout, sort_keys=True)
44 | 


--------------------------------------------------------------------------------
/lib_fslock/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'prometheus_client',
10 | ]
11 | setup(name='datamart-fslock',
12 |       version='2.1',
13 |       packages=['datamart_fslock'],
14 |       install_requires=req,
15 |       description="Filesystem locking library for Auctus",
16 |       author="Remi Rampin",
17 |       author_email='remi.rampin@nyu.edu',
18 |       maintainer="Remi Rampin",
19 |       maintainer_email='remi.rampin@nyu.edu',
20 |       url='https://gitlab.com/remram44/python-fslock',
21 |       project_urls={
22 |           'Homepage': 'https://gitlab.com/remram44/python-fslock',
23 |           'Source': 'https://gitlab.com/remram44/python-fslock',
24 |           'Tracker': 'https://gitlab.com/remram44/python-fslock/issues',
25 |       },
26 |       long_description="Filesystem locking library for Auctus",
27 |       license='MIT',
28 |       keywords=['lock', 'flock', 'file lock', 'locking', 'filesystem'],
29 |       classifiers=[
30 |           'Development Status :: 5 - Production/Stable',
31 |           'Intended Audience :: Developers',
32 |           'License :: OSI Approved :: MIT License',
33 |           'Operating System :: POSIX',
34 |           'Programming Language :: Python :: 3 :: Only'])
35 | 


--------------------------------------------------------------------------------
/frontend/src/components/Badges/IconAbc.tsx:
--------------------------------------------------------------------------------
 1 | import React, {SVGAttributes} from 'react';
 2 | 
 3 | interface Props extends SVGAttributes<SVGElement> {
 4 |   color: string;
 5 |   size: string | number;
 6 | }
 7 | 
 8 | const IconAbc = (props: Props) => {
 9 |   const {color, size, ...otherProps} = props;
10 |   return (
11 |     <svg
12 |       xmlns="http://www.w3.org/2000/svg"
13 |       viewBox="0 0 24 24"
14 |       width={size}
15 |       height={size}
16 |       fill="none"
17 |       stroke={color}
18 |       strokeWidth="2"
19 |       strokeLinecap="round"
20 |       strokeLinejoin="round"
21 |       {...otherProps}
22 |     >
23 |       <path d="M1.75 17L4.75 7L8.25 17" />
24 |       <path d="M2.75 14.5H7.25" />
25 |       <path d="M10.75 7V17" />
26 |       <ellipse cx="13" cy="13.5" rx="2.25" ry="3.5" />
27 |       <path d="M21.591 15.9749C21.2763 16.4644 20.8754 16.7977 20.439 16.9327C20.0025 17.0678 19.5501 16.9985 19.139 16.7336C18.7278 16.4687 18.3764 16.0201 18.1292 15.4445C17.882 14.8689 17.75 14.1922 17.75 13.5C17.75 12.8078 17.882 12.1311 18.1292 11.5555C18.3764 10.9799 18.7278 10.5313 19.139 10.2664C19.5501 10.0015 20.0025 9.9322 20.439 10.0673C20.8754 10.2023 21.2763 10.5356 21.591 11.0251" />
28 |     </svg>
29 |   );
30 | };
31 | 
32 | IconAbc.defaultProps = {
33 |   color: 'currentColor',
34 |   size: '24',
35 | };
36 | 
37 | export {IconAbc};
38 | 


--------------------------------------------------------------------------------
/frontend/src/components/Logo/Logo.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | //
 3 | // auctus-logo.min.svg is a minified file generated from auctus-logo.svg
 4 | // After updating source file, it can be with re-minified with:
 5 | //   npx svgo auctus-logo.svg -o auctus-logo.min.svg
 6 | //
 7 | import logo from './auctus-logo.min.svg';
 8 | import './Logo.css';
 9 | 
10 | function VerticalLogo() {
11 |   return (
12 |     <div className="text-center logo-vertical">
13 |       <img src={logo} className="d-block" alt="Auctus Logo" />
14 |       <span className="d-block text-oswald">Auctus Dataset Search</span>
15 |     </div>
16 |   );
17 | }
18 | 
19 | function HorizontalLogo(props: {onClick?: () => void}) {
20 |   const style = props.onClick ? {cursor: 'pointer'} : undefined;
21 |   return (
22 |     <div
23 |       className="d-inline text-center logo-horizontal"
24 |       style={style}
25 |       onClick={props.onClick}
26 |     >
27 |       <img src={logo} className="d-inline" alt="Auctus Logo" />
28 |       <span className="d-inline text-oswald">Auctus</span>
29 |     </div>
30 |   );
31 | }
32 | 
33 | function CenteredHorizontalLogo(props: {onClick?: () => void}) {
34 |   return (
35 |     <div className="logo-centered-horizontal">
36 |       <HorizontalLogo onClick={props.onClick} />
37 |     </div>
38 |   );
39 | }
40 | 
41 | export {VerticalLogo, HorizontalLogo, CenteredHorizontalLogo};
42 | 


--------------------------------------------------------------------------------
/frontend/src/components/Chip/Chip.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import * as Icon from 'react-feather';
 3 | import './Chip.css';
 4 | 
 5 | interface ChipProps {
 6 |   label: string;
 7 |   onClose?: () => void;
 8 |   onEdit?: () => void;
 9 |   icon?: Icon.Icon;
10 | }
11 | 
12 | function Chip(props: ChipProps) {
13 |   let classes = 'chip chip-outline';
14 |   // chip-primary
15 |   // chip-clickable
16 |   if (props.onClose) {
17 |     classes += ' chip-closeable';
18 |   }
19 |   return (
20 |     <div className={classes} tabIndex={0} role="button">
21 |       {props.icon && (
22 |         <div className="chip-icon">
23 |           <props.icon className="feather" />
24 |         </div>
25 |       )}
26 |       <span className="chip-label">
27 |         {props.label}
28 |         &nbsp;
29 |         {props.onEdit ? (
30 |           <button className="btn-link" onClick={props.onEdit}>
31 |             (edit)
32 |           </button>
33 |         ) : (
34 |           ''
35 |         )}
36 |       </span>
37 |       {props.onClose && (
38 |         <div className="chip-btn-close" onClick={props.onClose}>
39 |           <Icon.Trash2 className="feather" />
40 |         </div>
41 |       )}
42 |     </div>
43 |   );
44 | }
45 | 
46 | function ChipGroup(props: React.PropsWithChildren<{}>) {
47 |   return <div className="chip-group">{props.children}</div>;
48 | }
49 | 
50 | export {Chip, ChipGroup};
51 | 


--------------------------------------------------------------------------------
/discovery/ckan/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'requests',
11 |     'datamart-core',
12 | ]
13 | setup(name='datamart-ckan-discovery-service',
14 |       version='0.0',
15 |       py_modules=['ckan_discovery'],
16 |       install_requires=req,
17 |       description="CKAN discovery service for Auctus",
18 |       author="Remi Rampin",
19 |       author_email='remi.rampin@nyu.edu',
20 |       maintainer="Remi Rampin",
21 |       maintainer_email='remi.rampin@nyu.edu',
22 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
23 |       project_urls={
24 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
27 |       },
28 |       long_description="CKAN discovery service for Auctus",
29 |       license='Apache-2.0',
30 |       keywords=['auctus', 'datamart'],
31 |       classifiers=[
32 |           'Development Status :: 4 - Beta',
33 |           'Intended Audience :: Science/Research',
34 |           'License :: OSI Approved :: Apache Software License',
35 |           'Operating System :: Unix',
36 |           'Programming Language :: Python :: 3 :: Only',
37 |           'Topic :: Scientific/Engineering :: Information Analysis'])
38 | 


--------------------------------------------------------------------------------
/discovery/zenodo/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'requests',
11 |     'datamart-core',
12 | ]
13 | setup(name='datamart-zenodo-discovery-service',
14 |       version='0.0',
15 |       py_modules=['zenodo_discovery'],
16 |       install_requires=req,
17 |       description="Zenodo discovery service for Auctus",
18 |       author="Remi Rampin",
19 |       author_email='remi.rampin@nyu.edu',
20 |       maintainer="Remi Rampin",
21 |       maintainer_email='remi.rampin@nyu.edu',
22 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
23 |       project_urls={
24 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
27 |       },
28 |       long_description="Zenodo discovery service for Auctus",
29 |       license='Apache-2.0',
30 |       keywords=['auctus', 'datamart'],
31 |       classifiers=[
32 |           'Development Status :: 4 - Beta',
33 |           'Intended Audience :: Science/Research',
34 |           'License :: OSI Approved :: Apache Software License',
35 |           'Operating System :: Unix',
36 |           'Programming Language :: Python :: 3 :: Only',
37 |           'Topic :: Scientific/Engineering :: Information Analysis'])
38 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/excel.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import csv
 3 | from datetime import datetime
 4 | import openpyxl
 5 | 
 6 | from .common import UnsupportedConversion
 7 | from .utils import SimpleConverter
 8 | 
 9 | 
10 | def xlsx_to_csv(source_filename, dest_fileobj):
11 |     with contextlib.ExitStack() as stack:
12 |         fp = stack.enter_context(open(source_filename, 'rb'))
13 |         workbook = stack.enter_context(contextlib.closing(
14 |             openpyxl.load_workbook(fp, read_only=True)
15 |         ))
16 | 
17 |         sheets = workbook.worksheets
18 |         if len(sheets) != 1:
19 |             raise UnsupportedConversion(
20 |                 "Excel workbook has %d sheets" % len(sheets)
21 |             )
22 |         sheet, = sheets
23 | 
24 |         writer = csv.writer(dest_fileobj)
25 |         for values in sheet.iter_rows(values_only=True):
26 |             values = [
27 |                 # Avoid forced decimal point on integers
28 |                 '{0:g}'.format(v) if isinstance(v, float)
29 |                 # Decode dates into ISO-8601 strings
30 |                 else v.isoformat() if isinstance(v, datetime)
31 |                 else v
32 |                 for v in values
33 |             ]
34 | 
35 |             writer.writerow(values)
36 | 
37 | 
38 | class ExcelConverter(SimpleConverter):
39 |     """Adapter converting Excel files to CSV.
40 |     """
41 |     transform = staticmethod(xlsx_to_csv)
42 | 


--------------------------------------------------------------------------------
/discovery/isi/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'requests',
11 |     'datamart-core',
12 | ]
13 | setup(name='datamart-isi-discovery-service',
14 |       version='0.0',
15 |       py_modules=['isi_discovery'],
16 |       install_requires=req,
17 |       description="ISI Datamart discovery service for Auctus",
18 |       author="Remi Rampin",
19 |       author_email='remi.rampin@nyu.edu',
20 |       maintainer="Remi Rampin",
21 |       maintainer_email='remi.rampin@nyu.edu',
22 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
23 |       project_urls={
24 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
27 |       },
28 |       long_description="ISI Datamart discovery service for Auctus",
29 |       license='Apache-2.0',
30 |       keywords=['auctus', 'datamart'],
31 |       classifiers=[
32 |           'Development Status :: 4 - Beta',
33 |           'Intended Audience :: Science/Research',
34 |           'License :: OSI Approved :: Apache Software License',
35 |           'Operating System :: Unix',
36 |           'Programming Language :: Python :: 3 :: Only',
37 |           'Topic :: Scientific/Engineering :: Information Analysis'])
38 | 


--------------------------------------------------------------------------------
/discovery/socrata/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'sodapy',
11 |     'datamart-core',
12 | ]
13 | setup(name='datamart-socrata-discovery-service',
14 |       version='0.0',
15 |       py_modules=['socrata_discovery'],
16 |       install_requires=req,
17 |       description="Socrata discovery service for Auctus",
18 |       author="Remi Rampin",
19 |       author_email='remi.rampin@nyu.edu',
20 |       maintainer="Remi Rampin",
21 |       maintainer_email='remi.rampin@nyu.edu',
22 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
23 |       project_urls={
24 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
27 |       },
28 |       long_description="Socrata discovery service for Auctus",
29 |       license='Apache-2.0',
30 |       keywords=['auctus', 'datamart'],
31 |       classifiers=[
32 |           'Development Status :: 4 - Beta',
33 |           'Intended Audience :: Science/Research',
34 |           'License :: OSI Approved :: Apache Software License',
35 |           'Operating System :: Unix',
36 |           'Programming Language :: Python :: 3 :: Only',
37 |           'Topic :: Scientific/Engineering :: Information Analysis'])
38 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/DropdownMenu/DropdownMenu.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | interface Props {
 4 |   children: (api: {onClick: () => void; active: boolean}) => JSX.Element;
 5 | }
 6 | 
 7 | interface State {
 8 |   active: boolean;
 9 | }
10 | 
11 | class DropdownMenu extends React.Component<Props, State> {
12 |   ref: HTMLDivElement | null = null;
13 | 
14 |   constructor(props: Props) {
15 |     super(props);
16 |     this.state = {active: false};
17 |     this.toggleState = this.toggleState.bind(this);
18 |     this.handleClickOutside = this.handleClickOutside.bind(this);
19 |   }
20 | 
21 |   toggleState() {
22 |     this.setState({active: !this.state.active});
23 |   }
24 | 
25 |   handleClickOutside(e: MouseEvent) {
26 |     if (this.ref && !this.ref.contains(e.target as Node)) {
27 |       if (this.state.active) {
28 |         this.toggleState();
29 |       }
30 |     }
31 |   }
32 | 
33 |   componentDidMount() {
34 |     document.addEventListener('mousedown', this.handleClickOutside, false);
35 |   }
36 | 
37 |   componentWillUnmount() {
38 |     document.removeEventListener('mousedown', this.handleClickOutside, false);
39 |   }
40 | 
41 |   render() {
42 |     return (
43 |       <div ref={node => (this.ref = node)}>
44 |         {this.props.children({
45 |           onClick: this.toggleState,
46 |           active: this.state.active,
47 |         })}
48 |       </div>
49 |     );
50 |   }
51 | }
52 | 
53 | export {DropdownMenu};
54 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/Tabs/Tabs.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import './Tabs.css';
 3 | 
 4 | class Tabs extends React.PureComponent {
 5 |   render() {
 6 |     return <ul className={'nav nav-tabs'}>{this.props.children}</ul>;
 7 |   }
 8 | }
 9 | 
10 | interface TabProps {
11 |   onClick: ((event: React.MouseEvent) => void) | undefined;
12 |   selected: boolean;
13 | }
14 | 
15 | class Tab extends React.PureComponent<TabProps> {
16 |   render() {
17 |     const tabClassName = this.props.selected ? 'nav-link active' : 'nav-link';
18 |     return (
19 |       <li className="nav-item">
20 |         <button className={tabClassName} onClick={this.props.onClick}>
21 |           {this.props.children}
22 |         </button>
23 |       </li>
24 |     );
25 |   }
26 | }
27 | 
28 | class TabContent extends React.PureComponent {
29 |   render() {
30 |     return <div className="tab-content p-3">{this.props.children}</div>;
31 |   }
32 | }
33 | 
34 | interface TabPaneProps {
35 |   id: string;
36 |   active: boolean;
37 | }
38 | 
39 | class TabPane extends React.PureComponent<TabPaneProps> {
40 |   render() {
41 |     const tabPaneClassName = this.props.active
42 |       ? 'tab-pane fade show active'
43 |       : 'tab-pane fade';
44 |     return (
45 |       <div className={tabPaneClassName} role="tabpanel" id={this.props.id}>
46 |         {this.props.children}
47 |       </div>
48 |     );
49 |   }
50 | }
51 | 
52 | export {Tabs, Tab, TabContent, TabPane};
53 | 


--------------------------------------------------------------------------------
/contrib/k8s/secrets.jsonnet:
--------------------------------------------------------------------------------
 1 | // Set 'private_app: true' in the config to password-protect frontend & API
 2 | // You can create this file using the htpasswd tool
 3 | local private_app_password = |||
 4 |   auctus:$apr1$ECD/OaHB$CMBSkoEdcA/2uX8gPZM3y1
 5 | |||;
 6 | 
 7 | local amqp_user = 'auctususer';
 8 | local amqp_password = 'auctuspassword';
 9 | local admin_password = 'auctuspassword';
10 | local s3_key = 'devkey';
11 | local s3_secret = 'devpassword';
12 | local gcs_creds = std.base64('');
13 | 
14 | {
15 |   'secrets.yml': std.manifestYamlStream([
16 |     {
17 |       apiVersion: 'v1',
18 |       kind: 'Secret',
19 |       type: 'Opaque',
20 |       metadata: {
21 |         name: 'secrets',
22 |       },
23 |       local data = {
24 |         'amqp.user': amqp_user,
25 |         'amqp.password': amqp_password,
26 |         'admin.password': admin_password,
27 |         's3.key': s3_key,
28 |         's3.secret': s3_secret,
29 |         'gcs.creds': gcs_creds,
30 |         'smtp.user': 'auctususer',
31 |         'smtp.password': 'auctuspassword',
32 |       },
33 |       data: {
34 |         [k]: std.base64(data[k])
35 |         for k in std.objectFields(data)
36 |       },
37 |     },
38 |     {
39 |       apiVersion: 'v1',
40 |       kind: 'Secret',
41 |       type: 'Opaque',
42 |       metadata: {
43 |         name: 'basic-auth',
44 |       },
45 |       data: {
46 |         auth: std.base64(private_app_password),
47 |       },
48 |     },
49 |   ]),
50 | }
51 | 


--------------------------------------------------------------------------------
/apiserver/apiserver/enhance_metadata.py:
--------------------------------------------------------------------------------
 1 | from datamart_materialize.d3m import d3m_metadata
 2 | 
 3 | 
 4 | def enhance_metadata(result):
 5 |     """Add more metadata (e.g. D3M) from the original metadata.
 6 | 
 7 |     :param result: A dict with 'id' and 'metadata' keys
 8 |     :type result: dict
 9 |     :return: A dict with the 'metadata' key and additional keys such as
10 |         'd3m-metadata'
11 |     """
12 |     # Generate metadata in D3M format
13 |     result = dict(
14 |         result,
15 |         d3m_dataset_description=d3m_metadata(result['id'], result['metadata']),
16 |     )
17 | 
18 |     # Add temporal coverage information to columns for compatibility
19 |     if result['metadata'].get('temporal_coverage'):
20 |         columns = list(result['metadata']['columns'])
21 |         for temporal in result['metadata']['temporal_coverage']:
22 |             # Only works for temporal coverage extracted from a single column
23 |             if len(temporal['column_indexes']) == 1:
24 |                 idx = temporal['column_indexes'][0]
25 |                 columns[idx] = dict(
26 |                     columns[idx],
27 |                     coverage=temporal['ranges'],
28 |                 )
29 |                 if 'temporal_resolution' in temporal:
30 |                     columns[idx]['temporal_resolution'] = \
31 |                         temporal['temporal_resolution']
32 | 
33 |         result['metadata'] = dict(result['metadata'], columns=columns)
34 | 
35 |     return result
36 | 


--------------------------------------------------------------------------------
/tests/data/hourly.csv:
--------------------------------------------------------------------------------
 1 | aug_date,rain
 2 | 2019-06-12T00:00:00,no
 3 | 2019-06-12T01:00:00,no
 4 | 2019-06-12T02:00:00,no
 5 | 2019-06-12T03:00:00,yes
 6 | 2019-06-12T04:00:00,no
 7 | 2019-06-12T05:00:00,yes
 8 | 2019-06-12T06:00:00,yes
 9 | 2019-06-12T07:00:00,no
10 | 2019-06-12T08:00:00,no
11 | 2019-06-12T09:00:00,no
12 | 2019-06-12T10:00:00,yes
13 | 2019-06-12T11:00:00,yes
14 | 2019-06-12T12:00:00,yes
15 | 2019-06-12T13:00:00,yes
16 | 2019-06-12T14:00:00,yes
17 | 2019-06-12T15:00:00,no
18 | 2019-06-12T16:00:00,no
19 | 2019-06-12T17:00:00,yes
20 | 2019-06-12T18:00:00,yes
21 | 2019-06-12T19:00:00,yes
22 | 2019-06-12T20:00:00,yes
23 | 2019-06-12T21:00:00,yes
24 | 2019-06-12T22:00:00,no
25 | 2019-06-12T23:00:00,no
26 | 2019-06-13T00:00:00,no
27 | 2019-06-13T01:00:00,no
28 | 2019-06-13T02:00:00,no
29 | 2019-06-13T03:00:00,no
30 | 2019-06-13T04:00:00,yes
31 | 2019-06-13T05:00:00,no
32 | 2019-06-13T06:00:00,no
33 | 2019-06-13T07:00:00,yes
34 | 2019-06-13T08:00:00,no
35 | 2019-06-13T09:00:00,yes
36 | 2019-06-13T10:00:00,yes
37 | 2019-06-13T11:00:00,yes
38 | 2019-06-13T12:00:00,yes
39 | 2019-06-13T13:00:00,yes
40 | 2019-06-13T14:00:00,yes
41 | 2019-06-13T15:00:00,no
42 | 2019-06-13T16:00:00,yes
43 | 2019-06-13T17:00:00,yes
44 | 2019-06-13T18:00:00,no
45 | 2019-06-13T19:00:00,yes
46 | 2019-06-13T20:00:00,yes
47 | 2019-06-13T21:00:00,yes
48 | 2019-06-13T22:00:00,yes
49 | 2019-06-13T23:00:00,yes
50 | 2019-06-14T00:00:00,yes
51 | 2019-06-14T01:00:00,yes
52 | 2019-06-14T02:00:00,yes
53 | 2019-06-14T03:00:00,yes
54 | 


--------------------------------------------------------------------------------
/discovery/noaa/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'requests',
11 |     'datamart-core',
12 | ]
13 | setup(name='datamart-noaa-discovery-service',
14 |       version='0.0',
15 |       packages=['noaa_discovery'],
16 |       package_data={'noaa_discovery': [
17 |           'noaa_city_stations.csv',
18 |       ]},
19 |       install_requires=req,
20 |       description="NOAA discovery service for Auctus",
21 |       author="Remi Rampin",
22 |       author_email='remi.rampin@nyu.edu',
23 |       maintainer="Remi Rampin",
24 |       maintainer_email='remi.rampin@nyu.edu',
25 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |       project_urls={
27 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
28 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
29 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
30 |       },
31 |       long_description="NOAA discovery service for Auctus",
32 |       license='Apache-2.0',
33 |       keywords=['auctus', 'datamart'],
34 |       classifiers=[
35 |           'Development Status :: 4 - Beta',
36 |           'Intended Audience :: Science/Research',
37 |           'License :: OSI Approved :: Apache Software License',
38 |           'Operating System :: Unix',
39 |           'Programming Language :: Python :: 3 :: Only',
40 |           'Topic :: Scientific/Engineering :: Information Analysis'])
41 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/excel97.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import xlrd
 3 | import xlrd.sheet
 4 | 
 5 | from .common import UnsupportedConversion
 6 | from .utils import SimpleConverter
 7 | 
 8 | 
 9 | def xls_to_csv(source_filename, dest_fileobj):
10 |     with xlrd.open_workbook(source_filename) as workbook:
11 |         datemode = workbook.datemode
12 |         sheets = workbook.sheets()
13 |         if len(sheets) != 1:
14 |             raise UnsupportedConversion(
15 |                 "Excel workbook has %d sheets" % len(sheets)
16 |             )
17 |         sheet, = sheets
18 | 
19 |         writer = csv.writer(dest_fileobj)
20 |         for row_num in range(sheet.nrows):
21 |             values = sheet.row_values(row_num)
22 | 
23 |             for col_num, cell_type in enumerate(sheet.row_types(row_num)):
24 |                 if cell_type == xlrd.sheet.XL_CELL_DATE:
25 |                     # Decode dates into ISO-8601 strings
26 |                     values[col_num] = xlrd.xldate_as_datetime(
27 |                         values[col_num],
28 |                         datemode,
29 |                     ).isoformat()
30 |                 elif cell_type == xlrd.sheet.XL_CELL_NUMBER:
31 |                     # Avoid forced decimal point on integers
32 |                     values[col_num] = '{0:g}'.format(values[col_num])
33 | 
34 |             writer.writerow(values)
35 | 
36 | 
37 | class Excel97Converter(SimpleConverter):
38 |     """Adapter converting Excel files to CSV.
39 |     """
40 |     transform = staticmethod(xls_to_csv)
41 | 


--------------------------------------------------------------------------------
/discovery/worldbank/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'elasticsearch~=7.0',
10 |     'beautifulsoup4[html5lib]',
11 |     'pandas',
12 |     'datamart-core',
13 |     'datamart-profiler',
14 | ]
15 | setup(name='datamart-worldbank-discovery-service',
16 |       version='0.0',
17 |       py_modules=['worldbank_discovery'],
18 |       install_requires=req,
19 |       description="World Bank indicator discovery service for Auctus",
20 |       author="Remi Rampin",
21 |       author_email='remi.rampin@nyu.edu',
22 |       maintainer="Remi Rampin",
23 |       maintainer_email='remi.rampin@nyu.edu',
24 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |       project_urls={
26 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
27 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
28 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
29 |       },
30 |       long_description="World Bank indicator discovery service for Auctus",
31 |       license='Apache-2.0',
32 |       keywords=['auctus', 'datamart'],
33 |       classifiers=[
34 |           'Development Status :: 4 - Beta',
35 |           'Intended Audience :: Science/Research',
36 |           'License :: OSI Approved :: Apache Software License',
37 |           'Operating System :: Unix',
38 |           'Programming Language :: Python :: 3 :: Only',
39 |           'Topic :: Scientific/Engineering :: Information Analysis'])
40 | 


--------------------------------------------------------------------------------
/discovery/uaz_indicators/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'requests',
10 |     'datamart-core',
11 | ]
12 | setup(name='datamart-uaz-indicators-service',
13 |       version='0.0',
14 |       py_modules=['uaz_indicators'],
15 |       install_requires=req,
16 |       description="Auctus discovery service for indicators from the " +
17 |                   "University of Arizona",
18 |       author="Remi Rampin",
19 |       author_email='remi.rampin@nyu.edu',
20 |       maintainer="Remi Rampin",
21 |       maintainer_email='remi.rampin@nyu.edu',
22 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
23 |       project_urls={
24 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
26 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
27 |       },
28 |       long_description="Auctus discovery service for indicators from the " +
29 |                        "University of Arizona",
30 |       license='Apache-2.0',
31 |       keywords=['auctus', 'datamart'],
32 |       classifiers=[
33 |           'Development Status :: 4 - Beta',
34 |           'Intended Audience :: Science/Research',
35 |           'License :: OSI Approved :: Apache Software License',
36 |           'Operating System :: Unix',
37 |           'Programming Language :: Python :: 3 :: Only',
38 |           'Topic :: Scientific/Engineering :: Information Analysis'])
39 | 


--------------------------------------------------------------------------------
/lib_augmentation/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 7 | 
 8 | 
 9 | req = [
10 |     'pandas',
11 |     'numpy',
12 |     'datamart-materialize==0.11',
13 |     'datamart-profiler==0.11',
14 | ]
15 | with io.open('README.rst', encoding='utf-8') as fp:
16 |     description = fp.read()
17 | setup(name='datamart-augmentation',
18 |       version='0.10',
19 |       packages=['datamart_augmentation'],
20 |       install_requires=req,
21 |       description="Data augmentation functions for Auctus",
22 |       author="Remi Rampin",
23 |       author_email='remi.rampin@nyu.edu',
24 |       maintainer="Remi Rampin",
25 |       maintainer_email='remi.rampin@nyu.edu',
26 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
27 |       project_urls={
28 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
29 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
30 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
31 |       },
32 |       long_description=description,
33 |       license='Apache-2.0',
34 |       keywords=['auctus', 'datamart'],
35 |       classifiers=[
36 |           'Development Status :: 4 - Beta',
37 |           'Intended Audience :: Science/Research',
38 |           'License :: OSI Approved :: Apache Software License',
39 |           'Operating System :: OS Independent',
40 |           'Programming Language :: Python :: 3 :: Only',
41 |           'Topic :: Scientific/Engineering :: Information Analysis'])
42 | 


--------------------------------------------------------------------------------
/frontend/src/components/DateFilter/DateFilter.css:
--------------------------------------------------------------------------------
 1 | .react-datepicker__input-container input {
 2 |   border: 1px solid #ced4da;
 3 |   padding: 6px 10px;
 4 | }
 5 | 
 6 | 
 7 | .react-datepicker__day--in-range:hover,
 8 | .react-datepicker__day--in-selecting-range:hover,
 9 | .react-datepicker__day--selected:hover,
10 | .react-datepicker__month-text--in-range:hover,
11 | .react-datepicker__month-text--in-selecting-range:hover,
12 | .react-datepicker__month-text--selected:hover,
13 | .react-datepicker__quarter-text--in-range:hover,
14 | .react-datepicker__quarter-text--in-selecting-range:hover,
15 | .react-datepicker__quarter-text--selected:hover {
16 |   background-color: #2e1b59;
17 | }
18 | 
19 | .react-datepicker__day--keyboard-selected:hover,
20 | .react-datepicker__month-text--keyboard-selected:hover,
21 | .react-datepicker__quarter-text--keyboard-selected:hover {
22 |   background-color: #2e1b59;
23 | }
24 | 
25 | .react-datepicker__day--in-range,
26 | .react-datepicker__day--in-selecting-range,
27 | .react-datepicker__day--selected,
28 | .react-datepicker__month-text--in-range,
29 | .react-datepicker__month-text--in-selecting-range,
30 | .react-datepicker__month-text--selected,
31 | .react-datepicker__quarter-text--in-range,
32 | .react-datepicker__quarter-text--in-selecting-range,
33 | .react-datepicker__quarter-text--selected {
34 |   background-color: #63508b;
35 | }
36 | 
37 | .react-datepicker__day--keyboard-selected,
38 | .react-datepicker__month-text--keyboard-selected,
39 | .react-datepicker__quarter-text--keyboard-selected {
40 |   background-color: #63508b;
41 | }
42 | 


--------------------------------------------------------------------------------
/lib_core/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'aio-pika',
10 |     'elasticsearch~=7.0',
11 |     'lazo-index-service==0.7.0',
12 |     's3fs',
13 |     'gcsfs',
14 |     'advocate>=1.0,<2',
15 |     'prometheus_client',
16 |     'prometheus-async',
17 |     'sentry-sdk',
18 | ]
19 | setup(name='datamart-core',
20 |       version='0.0',
21 |       packages=['datamart_core'],
22 |       install_requires=req,
23 |       description="Core library for Auctus services",
24 |       author="Remi Rampin",
25 |       author_email='remi.rampin@nyu.edu',
26 |       maintainer="Remi Rampin",
27 |       maintainer_email='remi.rampin@nyu.edu',
28 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
29 |       project_urls={
30 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
31 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
32 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
33 |       },
34 |       long_description="Core library for Auctus services",
35 |       license='Apache-2.0',
36 |       keywords=['auctus', 'datamart'],
37 |       classifiers=[
38 |           'Development Status :: 4 - Beta',
39 |           'Intended Audience :: Science/Research',
40 |           'License :: OSI Approved :: Apache Software License',
41 |           'Operating System :: OS Independent',
42 |           'Programming Language :: Python :: 3 :: Only',
43 |           'Topic :: Scientific/Engineering :: Information Analysis'])
44 | 


--------------------------------------------------------------------------------
/snapshotter/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'prometheus_client',
10 |     'datamart-core',
11 | ]
12 | setup(name='datamart-snapshotter-service',
13 |       version='0.0',
14 |       packages=['snapshotter'],
15 |       entry_points={
16 |           'console_scripts': [
17 |               'snapshotter = snapshotter.snapshot:main']},
18 |       install_requires=req,
19 |       description="Snapshotter service for Auctus",
20 |       author="Remi Rampin",
21 |       author_email='remi.rampin@nyu.edu',
22 |       maintainer="Remi Rampin",
23 |       maintainer_email='remi.rampin@nyu.edu',
24 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |       project_urls={
26 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
27 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
28 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
29 |       },
30 |       long_description="Snapshotter service for Auctus",
31 |       license='Apache-2.0',
32 |       keywords=['auctus', 'datamart'],
33 |       classifiers=[
34 |           'Development Status :: 4 - Beta',
35 |           'Intended Audience :: Science/Research',
36 |           'License :: OSI Approved :: Apache Software License',
37 |           'Operating System :: Unix',
38 |           'Programming Language :: JavaScript',
39 |           'Programming Language :: Python :: 3 :: Only',
40 |           'Topic :: Scientific/Engineering :: Information Analysis'])
41 | 


--------------------------------------------------------------------------------
/cache_cleaner/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'prometheus_client',
10 |     'datamart-core',
11 | ]
12 | setup(name='datamart-cache-cleaner-service',
13 |       version='0.0',
14 |       packages=['cache_cleaner'],
15 |       entry_points={
16 |           'console_scripts': [
17 |               'cache_cleaner = cache_cleaner.cache:main']},
18 |       install_requires=req,
19 |       description="Cache Cleaner service for Auctus",
20 |       author="Remi Rampin",
21 |       author_email='remi.rampin@nyu.edu',
22 |       maintainer="Remi Rampin",
23 |       maintainer_email='remi.rampin@nyu.edu',
24 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
25 |       project_urls={
26 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
27 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
28 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
29 |       },
30 |       long_description="Cache Cleaner service for Auctus",
31 |       license='Apache-2.0',
32 |       keywords=['auctus', 'datamart'],
33 |       classifiers=[
34 |           'Development Status :: 4 - Beta',
35 |           'Intended Audience :: Science/Research',
36 |           'License :: OSI Approved :: Apache Software License',
37 |           'Operating System :: Unix',
38 |           'Programming Language :: JavaScript',
39 |           'Programming Language :: Python :: 3 :: Only',
40 |           'Topic :: Scientific/Engineering :: Information Analysis'])
41 | 


--------------------------------------------------------------------------------
/scripts/purge_source.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script deletes all the datasets in the index from a specific source.
 4 | """
 5 | 
 6 | import lazo_index_service
 7 | import logging
 8 | import os
 9 | import sys
10 | 
11 | from datamart_core.common import PrefixedElasticsearch, \
12 |     delete_dataset_from_index
13 | 
14 | 
15 | SIZE = 10000
16 | 
17 | 
18 | def clear(source):
19 |     es = PrefixedElasticsearch()
20 |     lazo_client = lazo_index_service.LazoIndexClient(
21 |         host=os.environ['LAZO_SERVER_HOST'],
22 |         port=int(os.environ['LAZO_SERVER_PORT'])
23 |     )
24 |     hits = es.scan(
25 |         index='datasets,pending',
26 |         query={
27 |             'query': {
28 |                 'bool': {
29 |                     'should': [
30 |                         {
31 |                             'term': {
32 |                                 'materialize.identifier': source,
33 |                             },
34 |                         },
35 |                         {
36 |                             'term': {
37 |                                 'source': source,
38 |                             },
39 |                         },
40 |                     ],
41 |                     'minimum_should_match': 1,
42 |                 },
43 |             },
44 |         },
45 |         _source=False,
46 |         size=SIZE,
47 |     )
48 |     for h in hits:
49 |         delete_dataset_from_index(es, h['_id'], lazo_client)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     logging.basicConfig(level=logging.INFO)
54 | 
55 |     clear(sys.argv[1])
56 | 


--------------------------------------------------------------------------------
/docker/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval: 15s
 3 | 
 4 | scrape_configs:
 5 |   - job_name: prometheus
 6 |     static_configs:
 7 |       - targets: ["localhost:9090"]
 8 |   - job_name: elasticsearch
 9 |     scrape_interval: 30s
10 |     scrape_timeout: 10s
11 |     static_configs:
12 |       - targets: ["elasticsearch-exporter:9114"]
13 |   - job_name: rabbitmq
14 |     scrape_timeout: 5s
15 |     metrics_path: /metrics
16 |     static_configs:
17 |       - targets: ["rabbitmq:15692"]
18 |   - job_name: haproxy
19 |     scrape_timeout: 5s
20 |     metrics_path: /metrics
21 |     static_configs:
22 |       - targets: ['apilb:8000']
23 |   - job_name: apiserver
24 |     dns_sd_configs:
25 |       - names:
26 |           - apiserver
27 |         type: "A"
28 |         port: 8000
29 |         refresh_interval: 60s
30 |   - job_name: coordinator
31 |     static_configs:
32 |       - targets: ["coordinator:8000"]
33 |   - job_name: cache-cleaner
34 |     dns_sd_configs:
35 |       - names:
36 |           - cache-cleaner
37 |         type: "A"
38 |         port: 8000
39 |         refresh_interval: 60s
40 |   - job_name: profiler
41 |     dns_sd_configs:
42 |       - names:
43 |           - profiler
44 |         type: "A"
45 |         port: 8000
46 |         refresh_interval: 60s
47 |   - job_name: lazo
48 |     dns_sd_configs:
49 |       - names:
50 |           - lazo
51 |         type: "A"
52 |         port: 8000
53 |         refresh_interval: 60s
54 |   - job_name: nominatim
55 |     scrape_timeout: 5s
56 |     metrics_path: /metrics
57 |     static_configs:
58 |       - targets: ["nominatim"]
59 | 


--------------------------------------------------------------------------------
/lib_profiler/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 7 | 
 8 | 
 9 | req = [
10 |     'numpy',
11 |     'opentelemetry-api',
12 |     'pandas',
13 |     'prometheus_client',
14 |     'python-dateutil',
15 |     'scikit-learn',
16 |     'regex',
17 |     'requests',
18 |     'datamart-geo>=0.2.3,<0.4',
19 | ]
20 | with io.open('README.rst', encoding='utf-8') as fp:
21 |     description = fp.read()
22 | setup(name='datamart-profiler',
23 |       version='0.11',
24 |       packages=['datamart_profiler'],
25 |       install_requires=req,
26 |       description="Data profiling library for Auctus",
27 |       author="Remi Rampin",
28 |       author_email='remi.rampin@nyu.edu',
29 |       maintainer="Remi Rampin",
30 |       maintainer_email='remi.rampin@nyu.edu',
31 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
32 |       project_urls={
33 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
34 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
35 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
36 |       },
37 |       long_description=description,
38 |       license='Apache-2.0',
39 |       keywords=['auctus', 'datamart'],
40 |       classifiers=[
41 |           'Development Status :: 4 - Beta',
42 |           'Intended Audience :: Science/Research',
43 |           'License :: OSI Approved :: Apache Software License',
44 |           'Operating System :: OS Independent',
45 |           'Programming Language :: Python :: 3 :: Only',
46 |           'Topic :: Scientific/Engineering :: Information Analysis'])
47 | 


--------------------------------------------------------------------------------
/scripts/migrate-source-url.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This scripts adds the source_url for Socrata datasets.
 4 | """
 5 | 
 6 | import json
 7 | import os
 8 | import shutil
 9 | import sys
10 | 
11 | 
12 | def migrate(from_folder, to_folder):
13 |     assert os.listdir(from_folder)
14 |     assert not os.listdir(to_folder)
15 | 
16 |     datasets = []
17 |     lazo = []
18 |     for f in os.listdir(from_folder):
19 |         if f.startswith('lazo.'):
20 |             lazo.append(f)
21 |         else:
22 |             datasets.append(f)
23 | 
24 |     for i, dataset in enumerate(datasets):
25 |         if i % 100 == 0:
26 |             print("% 5d / %5d datasets processed" % (i, len(datasets)))
27 | 
28 |         with open(os.path.join(from_folder, dataset)) as fp:
29 |             obj = json.load(fp)
30 | 
31 |         if obj['materialize']['identifier'] == 'datamart.socrata':
32 |             if 'source_url' not in obj:
33 |                 obj['source_url'] = 'https://%s/_/_/%s' % (
34 |                     obj['materialize']['socrata_domain'],
35 |                     obj['materialize']['socrata_id'],
36 |                 )
37 | 
38 |         with open(os.path.join(to_folder, dataset), 'w') as fp:
39 |             json.dump(obj, fp, sort_keys=True, indent=2)
40 | 
41 |     print("Copying lazo data...")
42 |     for i, f in enumerate(lazo):
43 |         if i % 1000 == 0:
44 |             print("% 5d / %5d files copied" % (i, len(lazo)))
45 |         shutil.copy2(
46 |             os.path.join(from_folder, f),
47 |             os.path.join(to_folder, f),
48 |         )
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     migrate(sys.argv[1], sys.argv[2])
53 | 


--------------------------------------------------------------------------------
/scripts/migrate-point-format.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This scripts updates the index for !166.
 4 | 
 5 | It adds the column "point_format" information (the default one, "long,lat").
 6 | """
 7 | 
 8 | import json
 9 | import os
10 | import shutil
11 | import sys
12 | 
13 | 
14 | def migrate(from_folder, to_folder):
15 |     assert os.listdir(from_folder)
16 |     assert not os.listdir(to_folder)
17 | 
18 |     datasets = []
19 |     lazo = []
20 |     for f in os.listdir(from_folder):
21 |         if f.startswith('lazo.'):
22 |             lazo.append(f)
23 |         else:
24 |             datasets.append(f)
25 | 
26 |     for i, dataset in enumerate(datasets):
27 |         if i % 100 == 0:
28 |             print("% 5d / %5d datasets processed" % (i, len(datasets)))
29 | 
30 |         with open(os.path.join(from_folder, dataset)) as fp:
31 |             obj = json.load(fp)
32 | 
33 |         for column in obj['columns']:
34 |             if (
35 |                 column['structural_type'] == 'http://schema.org/GeoCoordinates'
36 |                 and 'point_format' not in column
37 |             ):
38 |                 column['point_format'] = 'long,lat'
39 | 
40 |         with open(os.path.join(to_folder, dataset), 'w') as fp:
41 |             json.dump(obj, fp, sort_keys=True, indent=2)
42 | 
43 |     print("Copying lazo data...")
44 |     for i, f in enumerate(lazo):
45 |         if i % 1000 == 0:
46 |             print("% 5d / %5d files copied" % (i, len(lazo)))
47 |         shutil.copy2(
48 |             os.path.join(from_folder, f),
49 |             os.path.join(to_folder, f),
50 |         )
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     migrate(sys.argv[1], sys.argv[2])
55 | 


--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from datamart_core import common
 4 | 
 5 | 
 6 | class TestDatasetIdEncoding(unittest.TestCase):
 7 |     def test_encode(self):
 8 |         """Test encoding a dataset ID to a file name"""
 9 |         self.assertEqual(
10 |             common.encode_dataset_id('datamart_contrived/dataset#id;'),
11 |             'datamart__contrived_2Fdataset_23id_3B',
12 |         )
13 | 
14 |     def test_decode(self):
15 |         """Test decoding a file name to a dataset ID"""
16 |         self.assertEqual(
17 |             common.decode_dataset_id('datamart__contrived_2Fdataset_23id_3B'),
18 |             'datamart_contrived/dataset#id;',
19 |         )
20 | 
21 | 
22 | class TestStripHtml(unittest.TestCase):
23 |     def test_strip(self):
24 |         """Strip HTML from text"""
25 |         self.assertEqual(
26 |             common.strip_html(
27 |                 "<p>Text & <em>tags</em> &amp; <acronym title=\"HyperText "
28 |                 + "Markup Language\">HTML</acronym></p>",
29 |             ),
30 |             "Text & tags & HTML",
31 |         )
32 | 
33 |     def test_link(self):
34 |         """Keep link targets"""
35 |         self.assertEqual(
36 |             common.strip_html(
37 |                 "Some <a href=\"https://google.com/\">links</a> here: "
38 |                 + "<a href=\"https://google.com/\">google.com</a>",
39 |             ),
40 |             "Some links (https://google.com/) here: google.com",
41 |         )
42 | 
43 |     def test_unknown(self):
44 |         """Unknown tags should be preserved"""
45 |         self.assertEqual(
46 |             common.strip_html(
47 |                 "Run python <program>",
48 |             ),
49 |             "Run python <program>",
50 |         )
51 | 


--------------------------------------------------------------------------------
/contrib/k8s/snapshotter.libsonnet:
--------------------------------------------------------------------------------
 1 | local utils = import 'utils.libsonnet';
 2 | 
 3 | function(
 4 |   config,
 5 |   schedule='20 0 * * 5',
 6 | ) {
 7 |   'snapshotter-cronjob': config.kube('batch/v1beta1', 'CronJob', {
 8 |     file:: 'snapshotter.yml',
 9 |     metadata: {
10 |       name: 'snapshotter',
11 |       labels: {
12 |         app: 'auctus',
13 |         what: 'snapshotter',
14 |       },
15 |     },
16 |     spec: {
17 |       schedule: schedule,
18 |       jobTemplate: {
19 |         metadata: {
20 |           labels: {
21 |             app: 'auctus',
22 |             what: 'snapshotter',
23 |           },
24 |         },
25 |         spec: {
26 |           template: {
27 |             metadata: {
28 |               labels: {
29 |                 app: 'auctus',
30 |                 what: 'snapshotter',
31 |               },
32 |             },
33 |             spec: {
34 |               restartPolicy: 'Never',
35 |               securityContext: {
36 |                 runAsNonRoot: true,
37 |               },
38 |               containers: [
39 |                 {
40 |                   name: 'snapshotter',
41 |                   image: config.image,
42 |                   imagePullPolicy: 'IfNotPresent',
43 |                   args: ['snapshotter'],
44 |                   env: utils.env(
45 |                     {
46 |                       LOG_FORMAT: config.log_format,
47 |                       ELASTICSEARCH_HOSTS: 'elasticsearch:9200',
48 |                       ELASTICSEARCH_PREFIX: config.elasticsearch.prefix,
49 |                     }
50 |                     + utils.object_store_env(config.object_store)
51 |                   ),
52 |                 },
53 |               ],
54 |             },
55 |           },
56 |         },
57 |       },
58 |     },
59 |   }),
60 | }
61 | 


--------------------------------------------------------------------------------
/contrib/k8s/auctus.libsonnet:
--------------------------------------------------------------------------------
 1 | local app = import 'app.libsonnet';
 2 | local ckan = import 'discovery/ckan.libsonnet';
 3 | local socrata = import 'discovery/socrata.libsonnet';
 4 | local test_discoverer = import 'discovery/test.libsonnet';
 5 | local uaz_indicators = import 'discovery/uaz-indicators.libsonnet';
 6 | local worldbank = import 'discovery/worldbank.libsonnet';
 7 | local zenodo = import 'discovery/zenodo.libsonnet';
 8 | local elasticsearch = import 'elasticsearch.libsonnet';
 9 | local ingress = import 'ingress.libsonnet';
10 | local jaeger = import 'jaeger.libsonnet';
11 | local minio = import 'minio.libsonnet';
12 | local monitoring = import 'monitoring.libsonnet';
13 | local nominatim = import 'nominatim.libsonnet';
14 | local rabbitmq = import 'rabbitmq.libsonnet';
15 | local redis = import 'redis.libsonnet';
16 | local snapshotter = import 'snapshotter.libsonnet';
17 | local volumes_local = import 'volumes-local.libsonnet';
18 | local volumes = import 'volumes.libsonnet';
19 | 
20 | function(config) (
21 |   local data = (
22 |     {}
23 |     + redis(config)
24 |     + elasticsearch(config)
25 |     + rabbitmq(config)
26 |     + nominatim(config)
27 |     + app(config)
28 |     + snapshotter(config)
29 |     + ingress(config)
30 |     + minio(config)
31 |     + monitoring(config)
32 |     + jaeger(config)
33 |     + ckan(config)
34 |     + socrata(config)
35 |     + uaz_indicators(config)
36 |     + worldbank(config)
37 |     + zenodo(config)
38 |     //+ test_discoverer(config)
39 |   );
40 | 
41 |   local files = std.set([data[k].file for k in std.objectFields(data)]);
42 | 
43 |   {
44 |     [file]: std.manifestYamlStream([
45 |       data[k]
46 |       for k in std.objectFields(data)
47 |       if data[k] != null && data[k].file == file
48 |     ])
49 |     for file in files
50 |   }
51 | )
52 | 


--------------------------------------------------------------------------------
/frontend/src/components/JoinColumnsSelector/FunctionBin.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {useDrop} from 'react-dnd';
 3 | import * as Icon from 'react-feather';
 4 | 
 5 | const ItemType = 'badge';
 6 | 
 7 | const functionBinStyle = (background: string): React.CSSProperties => ({
 8 |   border: '1px solid #c0c0c0',
 9 |   padding: '.5rem',
10 |   margin: '0.25rem',
11 |   minHeight: '100px',
12 |   minWidth: '100px',
13 |   verticalAlign: 'middle',
14 |   backgroundColor: background,
15 | });
16 | 
17 | interface FunctionBinProps {
18 |   fn: string;
19 |   label?: string;
20 | }
21 | 
22 | const FunctionBin: React.FC<FunctionBinProps> = ({fn, label}) => {
23 |   const [{canDrop, isOver}, drop] = useDrop({
24 |     accept: ItemType,
25 |     drop: () => ({name: fn}),
26 |     collect: monitor => ({
27 |       isOver: monitor.isOver(),
28 |       canDrop: monitor.canDrop(),
29 |     }),
30 |   });
31 |   const isActive = canDrop && isOver;
32 |   return (
33 |     <div ref={drop} style={functionBinStyle('#f0f0f0')}>
34 |       <div
35 |         style={{
36 |           display: 'flex',
37 |           justifyContent: 'center',
38 |           flexDirection: 'column',
39 |           textAlign: 'center',
40 |           height: '100%',
41 |         }}
42 |       >
43 |         <span>
44 |           {isActive ? (
45 |             'Release!'
46 |           ) : (
47 |             <>
48 |               {label ? (
49 |                 <i className="small text-primary">{label}</i>
50 |               ) : (
51 |                 <>
52 |                   {fn.toUpperCase()}(
53 |                   <Icon.Hash className="feather text-primary" />)
54 |                 </>
55 |               )}
56 |             </>
57 |           )}
58 |         </span>
59 |       </div>
60 |     </div>
61 |   );
62 | };
63 | 
64 | export {FunctionBin};
65 | 


--------------------------------------------------------------------------------
/tests/data/spatiotemporal.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | import random
 3 | import os
 4 | 
 5 | 
 6 | GRID_CELL_SIZE = 0.001
 7 | COLORS = ['red', 'green', 'blue', 'yellow', 'orange']
 8 | 
 9 | 
10 | def main():
11 |     lat = 43.237597
12 |     lon = 6.072545
13 | 
14 |     data_dir = os.path.dirname(__file__)
15 | 
16 |     with open(os.path.join(data_dir, 'spatiotemporal.csv'), 'w') as f_data:
17 |         print('date,latitude,longitude,color', file=f_data)
18 |         rand = random.Random(1)
19 |         for t in range(20):
20 |             time = datetime(2006, 6, 20)
21 |             time += timedelta(minutes=t * 30)
22 |             for _ in range(10):
23 |                 print(
24 |                     '%s,%.3f,%.3f,%s' % (
25 |                         time.isoformat(),
26 |                         lat + GRID_CELL_SIZE * (rand.random() * 6 - 3),
27 |                         lon + GRID_CELL_SIZE * (rand.random() * 6 - 3),
28 |                         rand.choice(COLORS),
29 |                     ),
30 |                     file=f_data,
31 |                 )
32 | 
33 |     with open(os.path.join(data_dir, 'spatiotemporal_aug.csv'), 'w') as f_data:
34 |         print('date,latitude,longitude', file=f_data)
35 |         for t in range(3):
36 |             time = datetime(2006, 6, 20, 6)
37 |             time += timedelta(hours=t)
38 |             for x in range(-1, 1):
39 |                 for y in range(-1, 1):
40 |                     print(
41 |                         '%s,%.3f,%.3f' % (
42 |                             time.isoformat(),
43 |                             lat + GRID_CELL_SIZE * y,
44 |                             lon + GRID_CELL_SIZE * x,
45 |                         ),
46 |                         file=f_data,
47 |                     )
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/frontend/src/spatial-utils.ts:
--------------------------------------------------------------------------------
 1 | import MapBrowserEvent from 'ol/MapBrowserEvent';
 2 | import Geometry from 'ol/geom/Geometry';
 3 | import {FeatureLike} from 'ol/Feature';
 4 | import {Map} from 'ol';
 5 | import {Extent} from 'ol/extent';
 6 | 
 7 | //
 8 | // Following types are a temporary workaround to a bug in typings from the
 9 | // OpenLayers library (package @types/ol) This may be removed after upgrading
10 | // this library to a newer version
11 | //
12 | export interface MyMapBrowserEvent extends MapBrowserEvent {
13 |   pointerEvent: PointerEvent;
14 | }
15 | 
16 | interface MyGeometry extends Geometry {
17 |   getCoordinates(): number[][][];
18 | }
19 | 
20 | export function transformCoordinates(feature: FeatureLike) {
21 |   const geometry = feature.getGeometry() as MyGeometry;
22 |   const transformedGeometry = geometry
23 |     .clone()
24 |     .transform('EPSG:3857', 'EPSG:4326') as MyGeometry;
25 |   const coordinates = transformedGeometry.getCoordinates()[0];
26 |   return {
27 |     topLeftLat: coordinates[0][1],
28 |     topLeftLon: coordinates[0][0],
29 |     bottomRightLat: coordinates[2][1],
30 |     bottomRightLon: coordinates[2][0],
31 |   };
32 | }
33 | 
34 | export function centralizeMapToExtent(map: Map, extent: Extent) {
35 |   map.getView().fit(extent);
36 |   map.updateSize();
37 | }
38 | 
39 | export function centralizeMapToFeature(map: Map, feature: FeatureLike) {
40 |   const extent = feature.getGeometry()?.getExtent();
41 |   if (extent) {
42 |     centralizeMapToExtent(map, extent);
43 |   }
44 | }
45 | 
46 | export function wrapLongitude(x: number) {
47 |   if (-180 <= x && x <= 180) {
48 |     return x;
49 |   }
50 |   console.log(`>  ${x} ...`);
51 |   x += 180;
52 |   x = x % 360;
53 |   x = (x + 360) % 360; // second pass for negative remainders
54 |   x -= 180;
55 |   console.log(`<< ${x}`);
56 |   return x;
57 | }
58 | 


--------------------------------------------------------------------------------
/frontend/src/components/visus/Loading/Spinner.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import styled, {keyframes} from 'styled-components';
 3 | 
 4 | export const keyFrameInfiniteSpin = keyframes`
 5 |   from {transform: rotate(0deg)}
 6 |   to {transform: rotate(360deg)}
 7 | `;
 8 | 
 9 | export const SpinningSvg = styled.svg`
10 |   animation-name: ${keyFrameInfiniteSpin};
11 |   transition-property: transform;
12 |   animation-iteration-count: infinite;
13 |   animation-timing-function: linear;
14 | `;
15 | 
16 | interface SpinnerProps {
17 |   color?: string;
18 |   speed?: string;
19 |   gap?: number;
20 |   thickness?: number;
21 |   size?: string;
22 | }
23 | 
24 | class Spinner extends React.PureComponent<SpinnerProps> {
25 |   static defaultProps = {
26 |     color: 'rgba(0,0,0,0.4)',
27 |     gap: 4,
28 |     thickness: 4,
29 |     size: '1.0em',
30 |   };
31 | 
32 |   speedSwitch(speed?: string) {
33 |     if (speed === 'fast') {
34 |       return 600;
35 |     }
36 |     if (speed === 'slow') {
37 |       return 900;
38 |     }
39 |     return 750;
40 |   }
41 | 
42 |   render() {
43 |     return (
44 |       <SpinningSvg
45 |         height={this.props.size}
46 |         width={this.props.size}
47 |         style={{
48 |           animationDuration: `${this.speedSwitch(this.props.speed)}ms`,
49 |           marginBottom: 4,
50 |         }}
51 |         role="img"
52 |         viewBox="0 0 32 32"
53 |       >
54 |         <circle
55 |           role="presentation"
56 |           cx={16}
57 |           cy={16}
58 |           r={14 - this.props.thickness! / 2}
59 |           stroke={this.props.color}
60 |           fill="none"
61 |           strokeWidth={this.props.thickness}
62 |           strokeDasharray={Math.PI * 2 * (11 - this.props.gap!)}
63 |           strokeLinecap="round"
64 |         />
65 |       </SpinningSvg>
66 |     );
67 |   }
68 | }
69 | 
70 | export {Spinner};
71 | 


--------------------------------------------------------------------------------
/lib_core/datamart_core/types.py:
--------------------------------------------------------------------------------
 1 | # Column types
 2 | 
 3 | MISSING_DATA = 'https://metadata.datadrivendiscovery.org/types/MissingData'
 4 | """No data (whole column is missing)"""
 5 | 
 6 | INTEGER = 'http://schema.org/Integer'
 7 | """Integer (numbers without a decimal point)"""
 8 | 
 9 | FLOAT = 'http://schema.org/Float'
10 | """Floating-point numbers"""
11 | 
12 | TEXT = 'http://schema.org/Text'
13 | """Text, better represented as strings"""
14 | 
15 | BOOLEAN = 'http://schema.org/Boolean'
16 | """Booleans, e.g. only the two values \"true\" and \"false\""""
17 | 
18 | LATITUDE = 'http://schema.org/latitude'
19 | """Numerical values representing latitude coordinates"""
20 | 
21 | LONGITUDE = 'http://schema.org/longitude'
22 | """Numerical values representing longitude coordinates"""
23 | 
24 | DATE_TIME = 'http://schema.org/DateTime'
25 | """A specific instant in time (not partial ones such as "July 4" or "12am")"""
26 | 
27 | ADDRESS = 'http://schema.org/address'
28 | """The street address of a location"""
29 | 
30 | ADMIN = 'http://schema.org/AdministrativeArea'
31 | """A named administrative area, such as a country, state, or city"""
32 | 
33 | URL = 'http://schema.org/URL'
34 | """A URL"""
35 | 
36 | FILE_PATH = 'https://metadata.datadrivendiscovery.org/types/FileName'
37 | """A filename"""
38 | 
39 | ID = 'http://schema.org/identifier'
40 | """An identifier"""
41 | 
42 | CATEGORICAL = 'http://schema.org/Enumeration'
43 | """Categorical values, i.e. drawn from a limited number of options"""
44 | 
45 | GEO_POINT = 'http://schema.org/GeoCoordinates'
46 | """A geographic location (latitude+longitude coordinates)"""
47 | 
48 | GEO_POLYGON = 'http://schema.org/GeoShape'
49 | """A geographic shape described by its coordinates"""
50 | 
51 | 
52 | # Dataset types
53 | 
54 | DATASET_NUMERICAL = 'numerical'
55 | DATASET_CATEGORICAL = 'categorical'
56 | DATASET_SPATIAL = 'spatial'
57 | DATASET_TEMPORAL = 'temporal'
58 | 


--------------------------------------------------------------------------------
/lib_profiler/datamart_profiler/types.py:
--------------------------------------------------------------------------------
 1 | # Column types
 2 | 
 3 | MISSING_DATA = 'https://metadata.datadrivendiscovery.org/types/MissingData'
 4 | """No data (whole column is missing)"""
 5 | 
 6 | INTEGER = 'http://schema.org/Integer'
 7 | """Integer (numbers without a decimal point)"""
 8 | 
 9 | FLOAT = 'http://schema.org/Float'
10 | """Floating-point numbers"""
11 | 
12 | TEXT = 'http://schema.org/Text'
13 | """Text, better represented as strings"""
14 | 
15 | BOOLEAN = 'http://schema.org/Boolean'
16 | """Booleans, e.g. only the two values \"true\" and \"false\""""
17 | 
18 | LATITUDE = 'http://schema.org/latitude'
19 | """Numerical values representing latitude coordinates"""
20 | 
21 | LONGITUDE = 'http://schema.org/longitude'
22 | """Numerical values representing longitude coordinates"""
23 | 
24 | DATE_TIME = 'http://schema.org/DateTime'
25 | """A specific instant in time (not partial ones such as "July 4" or "12am")"""
26 | 
27 | ADDRESS = 'http://schema.org/address'
28 | """The street address of a location"""
29 | 
30 | ADMIN = 'http://schema.org/AdministrativeArea'
31 | """A named administrative area, such as a country, state, or city"""
32 | 
33 | URL = 'http://schema.org/URL'
34 | """A URL"""
35 | 
36 | FILE_PATH = 'https://metadata.datadrivendiscovery.org/types/FileName'
37 | """A filename"""
38 | 
39 | ID = 'http://schema.org/identifier'
40 | """An identifier"""
41 | 
42 | CATEGORICAL = 'http://schema.org/Enumeration'
43 | """Categorical values, i.e. drawn from a limited number of options"""
44 | 
45 | GEO_POINT = 'http://schema.org/GeoCoordinates'
46 | """A geographic location (latitude+longitude coordinates)"""
47 | 
48 | GEO_POLYGON = 'http://schema.org/GeoShape'
49 | """A geographic shape described by its coordinates"""
50 | 
51 | 
52 | # Dataset types
53 | 
54 | DATASET_NUMERICAL = 'numerical'
55 | DATASET_CATEGORICAL = 'categorical'
56 | DATASET_SPATIAL = 'spatial'
57 | DATASET_TEMPORAL = 'temporal'
58 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | import os
 8 | import subprocess
 9 | 
10 | 
11 | # -- Project information -----------------------------------------------------
12 | 
13 | project = 'Auctus'
14 | copyright = '2019, New York University'
15 | author = 'Remi Rampin'
16 | 
17 | # The full version, including alpha/beta/rc tags
18 | release = subprocess.check_output(['git', 'describe'], encoding='ascii')
19 | os.environ['DATAMART_VERSION'] = release
20 | 
21 | 
22 | # -- General configuration ---------------------------------------------------
23 | 
24 | # Add any Sphinx extension module names here, as strings. They can be
25 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
26 | # ones.
27 | extensions = [
28 |     'sphinx.ext.autodoc',
29 | ]
30 | 
31 | # Add any paths that contain templates here, relative to this directory.
32 | templates_path = ['_templates']
33 | 
34 | # List of patterns, relative to source directory, that match files and
35 | # directories to ignore when looking for source files.
36 | # This pattern also affects html_static_path and html_extra_path.
37 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
38 | 
39 | 
40 | # -- Options for HTML output -------------------------------------------------
41 | 
42 | # The theme to use for HTML and HTML Help pages.  See the documentation for
43 | # a list of builtin themes.
44 | #
45 | html_theme = 'sphinx_rtd_theme'
46 | 
47 | # Add any paths that contain custom static files (such as style sheets) here,
48 | # relative to this directory. They are copied after the builtin static files,
49 | # so a file named "default.css" will overwrite the builtin "default.css".
50 | html_static_path = ['_static']
51 | 


--------------------------------------------------------------------------------
/profiler/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'aio-pika',
10 |     'elasticsearch~=7.0',
11 |     'lazo-index-service==0.7.0',
12 |     'opentelemetry-api',
13 |     'opentelemetry-distro',
14 |     'opentelemetry-instrumentation-elasticsearch',
15 |     'opentelemetry-instrumentation-grpc',
16 |     'prometheus_client',
17 |     'xlrd',
18 |     'defusedxml',
19 |     'datamart-core',
20 |     'datamart-materialize',
21 |     'datamart-profiler',
22 | ]
23 | setup(name='datamart-profiler-service',
24 |       version='0.0',
25 |       py_modules=['profiler'],
26 |       entry_points={
27 |           'console_scripts': [
28 |               'profiler = profiler:main']},
29 |       install_requires=req,
30 |       description="Data profiling service of Auctus",
31 |       author="Remi Rampin",
32 |       author_email='remi.rampin@nyu.edu',
33 |       maintainer="Remi Rampin",
34 |       maintainer_email='remi.rampin@nyu.edu',
35 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
36 |       project_urls={
37 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
38 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
39 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
40 |       },
41 |       long_description="Data profiling service of Auctus",
42 |       license='Apache-2.0',
43 |       keywords=['auctus', 'datamart'],
44 |       classifiers=[
45 |           'Development Status :: 3 - Alpha',
46 |           'Intended Audience :: Science/Research',
47 |           'License :: OSI Approved :: Apache Software License',
48 |           'Natural Language :: English',
49 |           'Operating System :: OS Independent',
50 |           'Programming Language :: Python :: 3 :: Only',
51 |           'Topic :: Scientific/Engineering :: Information Analysis'])
52 | 


--------------------------------------------------------------------------------
/coordinator/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'aio-pika',
10 |     'elasticsearch~=7.0',
11 |     'prometheus_client',
12 |     'PyYaml',
13 |     'jinja2',
14 |     'tornado>=5.0',
15 |     'datamart-core',
16 | ]
17 | setup(name='datamart-coordinator-service',
18 |       version='0.0',
19 |       packages=['coordinator'],
20 |       package_data={'coordinator': [
21 |           'static/css/*.css', 'static/css/*.css.map',
22 |           'static/js/*.js', 'static/js/*.js.map',
23 |           'templates/*.html',
24 |           'elasticsearch.yml',
25 |       ]},
26 |       entry_points={
27 |           'console_scripts': [
28 |               'coordinator = coordinator.web:main']},
29 |       install_requires=req,
30 |       description="Coordinator service for Auctus",
31 |       author="Remi Rampin",
32 |       author_email='remi.rampin@nyu.edu',
33 |       maintainer="Remi Rampin",
34 |       maintainer_email='remi.rampin@nyu.edu',
35 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
36 |       project_urls={
37 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
38 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
39 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
40 |       },
41 |       long_description="Coordinator service for Auctus",
42 |       license='Apache-2.0',
43 |       keywords=['auctus', 'datamart'],
44 |       classifiers=[
45 |           'Development Status :: 4 - Beta',
46 |           'Intended Audience :: Science/Research',
47 |           'License :: OSI Approved :: Apache Software License',
48 |           'Operating System :: Unix',
49 |           'Programming Language :: JavaScript',
50 |           'Programming Language :: Python :: 3 :: Only',
51 |           'Topic :: Scientific/Engineering :: Information Analysis'])
52 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/types.py:
--------------------------------------------------------------------------------
 1 | # Column types
 2 | 
 3 | MISSING_DATA = 'https://metadata.datadrivendiscovery.org/types/MissingData'
 4 | """No data (whole column is missing)"""
 5 | 
 6 | INTEGER = 'http://schema.org/Integer'
 7 | """Integer (numbers without a decimal point)"""
 8 | 
 9 | FLOAT = 'http://schema.org/Float'
10 | """Floating-point numbers"""
11 | 
12 | TEXT = 'http://schema.org/Text'
13 | """Text, better represented as strings"""
14 | 
15 | BOOLEAN = 'http://schema.org/Boolean'
16 | """Booleans, e.g. only the two values \"true\" and \"false\""""
17 | 
18 | LATITUDE = 'http://schema.org/latitude'
19 | """Numerical values representing latitude coordinates"""
20 | 
21 | LONGITUDE = 'http://schema.org/longitude'
22 | """Numerical values representing longitude coordinates"""
23 | 
24 | DATE_TIME = 'http://schema.org/DateTime'
25 | """A specific instant in time (not partial ones such as "July 4" or "12am")"""
26 | 
27 | ADDRESS = 'http://schema.org/address'
28 | """The street address of a location"""
29 | 
30 | ADMIN = 'http://schema.org/AdministrativeArea'
31 | """A named administrative area, such as a country, state, or city"""
32 | 
33 | URL = 'http://schema.org/URL'
34 | """A URL"""
35 | 
36 | FILE_PATH = 'https://metadata.datadrivendiscovery.org/types/FileName'
37 | """A filename"""
38 | 
39 | ID = 'http://schema.org/identifier'
40 | """An identifier"""
41 | 
42 | CATEGORICAL = 'http://schema.org/Enumeration'
43 | """Categorical values, i.e. drawn from a limited number of options"""
44 | 
45 | GEO_POINT = 'http://schema.org/GeoCoordinates'
46 | """A geographic location (latitude+longitude coordinates)"""
47 | 
48 | GEO_POLYGON = 'http://schema.org/GeoShape'
49 | """A geographic shape described by its coordinates"""
50 | 
51 | 
52 | # Dataset types
53 | 
54 | DATASET_NUMERICAL = 'numerical'
55 | DATASET_CATEGORICAL = 'categorical'
56 | DATASET_SPATIAL = 'spatial'
57 | DATASET_TEMPORAL = 'temporal'
58 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/pivot.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from datamart_materialize.utils import SimpleConverter
 4 | 
 5 | 
 6 | VALUE_COLUMN_LABEL = 'value'
 7 | 
 8 | 
 9 | def pivot_table(
10 |     source_filename, dest_fileobj, except_columns, date_label='date',
11 | ):
12 |     with open(source_filename, 'r') as src_fp:
13 |         src = iter(csv.reader(src_fp))
14 |         dst = csv.writer(dest_fileobj)
15 | 
16 |         # Read original columns, some are carried over
17 |         try:
18 |             orig_columns = next(src)
19 |         except StopIteration:
20 |             raise ValueError("Empty table")
21 |         carried_columns = [orig_columns[i] for i in except_columns]
22 | 
23 |         # Generate new header
24 |         dst.writerow(carried_columns + [date_label, VALUE_COLUMN_LABEL])
25 | 
26 |         # Indexes of date columns
27 |         date_indexes = [
28 |             i for i in range(len(orig_columns))
29 |             if i not in except_columns
30 |         ]
31 |         dates = [
32 |             name for i, name in enumerate(orig_columns)
33 |             if i not in except_columns
34 |         ]
35 | 
36 |         for row in src:
37 |             carried_values = [row[i] for i in except_columns]
38 |             for date, date_idx in zip(dates, date_indexes):
39 |                 dst.writerow(carried_values + [date, row[date_idx]])
40 | 
41 | 
42 | class PivotConverter(SimpleConverter):
43 |     """Adapter pivoting a table.
44 |     """
45 |     def __init__(self, writer, *, except_columns, date_label='date'):
46 |         super(PivotConverter, self).__init__(writer)
47 |         self.except_columns = except_columns
48 |         self.date_label = date_label
49 | 
50 |     def transform(self, source_filename, dest_fileobj):
51 |         pivot_table(
52 |             source_filename,
53 |             dest_fileobj,
54 |             self.except_columns,
55 |             self.date_label,
56 |         )
57 | 


--------------------------------------------------------------------------------
/apiserver/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | 
 5 | os.chdir(os.path.abspath(os.path.dirname(__file__)))
 6 | 
 7 | 
 8 | req = [
 9 |     'advocate>=1.0,<2',
10 |     'aio-pika',
11 |     'elasticsearch~=7.0',
12 |     'redis~=3.4',
13 |     'lazo-index-service==0.7.0',
14 |     'opentelemetry-distro',
15 |     'opentelemetry-instrumentation-elasticsearch',
16 |     'opentelemetry-instrumentation-grpc',
17 |     'opentelemetry-instrumentation-tornado',
18 |     'prometheus_client',
19 |     'tornado>=5.0',
20 |     'datamart-augmentation',
21 |     'datamart-core',
22 |     'datamart-materialize',
23 |     'datamart-profiler',
24 | ]
25 | setup(name='datamart-api-service',
26 |       version='0.0',
27 |       packages=['apiserver'],
28 |       entry_points={
29 |           'console_scripts': [
30 |               'datamart-apiserver = apiserver.main:main']},
31 |       install_requires=req,
32 |       description="API service of Auctus",
33 |       author="Remi Rampin",
34 |       author_email='remi.rampin@nyu.edu',
35 |       maintainer="Remi Rampin",
36 |       maintainer_email='remi.rampin@nyu.edu',
37 |       url='https://gitlab.com/ViDA-NYU/auctus/auctus',
38 |       project_urls={
39 |           'Homepage': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
40 |           'Source': 'https://gitlab.com/ViDA-NYU/auctus/auctus',
41 |           'Tracker': 'https://gitlab.com/ViDA-NYU/auctus/auctus/-/issues',
42 |       },
43 |       long_description="API service of Auctus",
44 |       license='Apache-2.0',
45 |       keywords=['auctus', 'datamart'],
46 |       classifiers=[
47 |           'Development Status :: 4 - Beta',
48 |           'Intended Audience :: Science/Research',
49 |           'License :: OSI Approved :: Apache Software License',
50 |           'Operating System :: Unix',
51 |           'Programming Language :: Python :: 3 :: Only',
52 |           'Topic :: Scientific/Engineering :: Information Analysis'])
53 | 


--------------------------------------------------------------------------------
/frontend/src/components/SearchBar/SearchBar.tsx:
--------------------------------------------------------------------------------
 1 | import './SearchBar.css';
 2 | import React from 'react';
 3 | import * as Icon from 'react-feather';
 4 | 
 5 | interface SearchBarProps {
 6 |   active: boolean;
 7 |   placeholder?: string;
 8 |   value: string;
 9 |   onQueryChange: (query: string) => void;
10 |   onSubmitQuery: () => void;
11 | }
12 | 
13 | class SearchBar extends React.PureComponent<SearchBarProps> {
14 |   constructor(props: SearchBarProps) {
15 |     super(props);
16 |     this.handleChange = this.handleChange.bind(this);
17 |     this.handleSubmit = this.handleSubmit.bind(this);
18 |   }
19 | 
20 |   isActive() {
21 |     return this.props.active;
22 |   }
23 | 
24 |   handleChange(event: React.ChangeEvent<HTMLInputElement>) {
25 |     const query = event.target.value;
26 |     this.props.onQueryChange(query);
27 |   }
28 | 
29 |   handleSubmit(e: React.FormEvent<HTMLFormElement>) {
30 |     e.preventDefault();
31 |     this.props.onSubmitQuery();
32 |   }
33 | 
34 |   render() {
35 |     return (
36 |       <form onSubmit={this.handleSubmit}>
37 |         <div className="input-group SearchBar">
38 |           <input
39 |             type="text"
40 |             name="search"
41 |             className="form-control SearchBar-input"
42 |             placeholder={this.props.placeholder}
43 |             value={this.props.value}
44 |             onChange={this.handleChange}
45 |           />
46 |           <div
47 |             className="input-group-append"
48 |             onClick={() => this.props.onSubmitQuery()}
49 |           >
50 |             <span
51 |               className={`input-group-text SearchBar-icon${
52 |                 this.isActive() ? ' SearchBar-icon-active' : ''
53 |               }`}
54 |             >
55 |               <Icon.Search className="feather" />
56 |             </span>
57 |           </div>
58 |         </div>
59 |       </form>
60 |     );
61 |   }
62 | }
63 | 
64 | export {SearchBar};
65 | 


--------------------------------------------------------------------------------
/frontend/src/components/Chip/Chip.css:
--------------------------------------------------------------------------------
 1 | .chip {
 2 |   /* color: rgba(0, 0, 0, 0.87); */
 3 |   border: none;
 4 |   cursor: default;
 5 |   height: 32px;
 6 |   display: inline-flex;
 7 |   outline: 0;
 8 |   padding: 0 0.5rem;
 9 |   font-size: 0.8125rem;
10 |   box-sizing: border-box;
11 |   transition: background-color 300ms cubic-bezier(0.4, 0, 0.2, 1) 0ms,
12 |     box-shadow 300ms cubic-bezier(0.4, 0, 0.2, 1) 0ms;
13 |   align-items: center;
14 |   /* font-family: "Roboto", "Helvetica", "Arial", sans-serif; */
15 |   font-family: "Oswald";
16 |   white-space: nowrap;
17 |   border-radius: 16px;
18 |   vertical-align: middle;
19 |   justify-content: center;
20 |   text-decoration: none;
21 |   background-color: #e0e0e0;
22 | }
23 | 
24 | .chip-clickable {
25 |   cursor: pointer;
26 |   user-select: none;
27 |   -webkit-tap-highlight-color: transparent;
28 | }
29 | 
30 | .chip-clickable:hover {
31 |   background-color: #f0f0f0;
32 | }
33 | 
34 | .chip-btn-close {
35 |   margin-left: .25rem!important;
36 |   margin-bottom: 0.1rem!important;
37 | }
38 | 
39 | .chip-icon {
40 |   margin-right: .3rem!important;
41 |   margin-bottom: 0.1rem!important;
42 | }
43 | 
44 | .chip .chip-btn-close {
45 |   color: rgb(0, 0, 0, 0.5);
46 |   cursor: pointer;
47 |   user-select: none;
48 |   -webkit-tap-highlight-color: transparent;
49 | }
50 | .chip .chip-btn-close:hover {
51 |   color: rgb(0, 0, 0, 1.0);
52 | }
53 | 
54 | .chip-outline {
55 |   border: 1px solid #ced4da;
56 |   background-color: transparent;
57 | }
58 | .chip-outline .chip-icon {
59 |   color: var(--primary);
60 | }
61 | 
62 | 
63 | .chip-primary {
64 |   border-color: var(--primary);
65 | }
66 | .chip-primary .chip-icon {
67 |   color: var(--primary);
68 | }
69 | .chip-primary .chip-btn-close {
70 |   color: rgb(99, 80, 139, 0.7);
71 | }
72 | .chip-primary .chip-btn-close:hover {
73 |   color: rgb(99, 80, 139);
74 | }
75 | 
76 | 
77 | .chip-group > * {
78 |   display: inline-flex;
79 |   margin-right: 0.25rem;
80 | }
81 | 
82 | .chip-group > *:last-child {
83 |   margin-left: 0px;
84 | }
85 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {CenteredHorizontalLogo} from './components/Logo/Logo';
 3 | import {MainMenu} from './components/MainMenu/MainMenu';
 4 | import {BrowserRouter as Router, Link, Switch, Route} from 'react-router-dom';
 5 | import {Upload} from './components/Upload/Upload';
 6 | import {Statistics} from './components/Statistics/Statistics';
 7 | import {SearchApp} from './components/SearchApp/SearchApp';
 8 | 
 9 | class App extends React.Component {
10 |   render() {
11 |     return (
12 |       <div className="container-vh-full">
13 |         <Router>
14 |           <Switch>
15 |             <Route
16 |               path="/upload"
17 |               render={() => (
18 |                 <div className="container-vh-scroll">
19 |                   <MainMenu />
20 |                   <Link to="/" style={{textDecoration: 'none'}}>
21 |                     <CenteredHorizontalLogo />
22 |                   </Link>
23 |                   <Upload />
24 |                 </div>
25 |               )}
26 |             />
27 |             <Route
28 |               path="/statistics"
29 |               render={() => (
30 |                 <div className="container-vh-scroll">
31 |                   <MainMenu />
32 |                   <Link to="/" style={{textDecoration: 'none'}}>
33 |                     <CenteredHorizontalLogo />
34 |                   </Link>
35 |                   <Statistics />
36 |                 </div>
37 |               )}
38 |             />
39 |             <Route
40 |               path="/"
41 |               render={routeProps => (
42 |                 <>
43 |                   <MainMenu />
44 |                   <SearchApp
45 |                     history={routeProps.history}
46 |                     match={routeProps.match}
47 |                     location={routeProps.location}
48 |                   />
49 |                 </>
50 |               )}
51 |             />
52 |           </Switch>
53 |         </Router>
54 |       </div>
55 |     );
56 |   }
57 | }
58 | 
59 | export {App};
60 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | Scripts
 2 | =======
 3 | 
 4 | This folder contains scripts that make it easy to perform certain tasks. Those scripts are meant to be run inside the Auctus environment; some of them are prefixed with `docker_`, those will run the corresponding script inside an Auctus container (provided the images have been built using the default docker-compose names).
 5 | 
 6 | * setup.sh: Run this once to setup your local checkout. This sets up the permissions on the volumes for docker-compose.
 7 | * docker_import_snapshot.sh: This downloads a dump of Elasticsearch from https://auctus.vida-nyu.org/snapshot/ and imports it using import_all.py
 8 | * docker_import_all.sh / import_all.py: This can be used to load a dump of Elasticsearch as JSON files. Useful to restore a backup
 9 | * import.py: Import a single dataset from a JSON file
10 | * reprocess_all.py: This loads a dump of Elasticsearch as JSON files, but reprocesses the datasets
11 | * freshen_old_index.py: This reprocesses datasets that were profiled by old versions
12 | * docker_export_all.sh / export_all.py: This can be used to do a backup of the index. It creates a dump of Elasticsearch as JSON files
13 | * docker-save_uploads.sh: This can be used to save the datasets that have been manually uploaded into Auctus (the data itself, not the indexed JSON documents)
14 | * delete_dataset.py: Removes a single dataset from the index
15 | * list_big_datasets.py: Lists the big datasets that have been indexed (by looking for the 'size' property above 50 MB)
16 | * list_sources.py: This lists the number of datasets in the index per source (this is now shown on the index page of the coordinator as well)
17 | * docker_purge_source.sh / purge_source.py: This removes all datasets from a given source
18 | * clear_caches.py / docker_clear_caches.sh: This safely clears the caches
19 | * upload_dataset.sh: This profiles and adds a dataset to the index
20 | * report-uploads.sh: Alerts when datasets are uploaded to the system
21 | * dataset_to_sup_index.py: This creates the supplementary column indices after 5507ab47
22 | 


--------------------------------------------------------------------------------
/docker/grafana.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG GRAFANA_VERSION="latest"
 2 | 
 3 | FROM python:3.8 AS tini
 4 | 
 5 | ENV TINI_VERSION v0.18.0
 6 | RUN curl -Lo /tini https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini && \
 7 |     chmod +x /tini
 8 | 
 9 | FROM grafana/grafana:${GRAFANA_VERSION}
10 | 
11 | USER root
12 | 
13 | ARG GF_INSTALL_IMAGE_RENDERER_PLUGIN="false"
14 | 
15 | ENV GF_PATHS_PLUGINS="/var/lib/grafana-plugins"
16 | 
17 | RUN mkdir -p "$GF_PATHS_PLUGINS" && \
18 |     chown -R grafana:root "$GF_PATHS_PLUGINS"
19 | 
20 | RUN if [ $GF_INSTALL_IMAGE_RENDERER_PLUGIN != "false" ]; then \
21 |     echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \
22 |     echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories && \
23 |     echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
24 |     apk --no-cache  upgrade && \
25 |     apk add --no-cache udev ttf-opensans chromium && \
26 |     rm -rf /tmp/* && \
27 |     rm -rf /usr/share/grafana/tools/phantomjs; \
28 | fi
29 | 
30 | USER grafana
31 | 
32 | ENV GF_RENDERER_PLUGIN_CHROME_BIN="/usr/bin/chromium-browser"
33 | ENV GF_PLUGIN_RENDERING_CHROME_BIN="/usr/bin/chromium-browser"
34 | 
35 | RUN if [ $GF_INSTALL_IMAGE_RENDERER_PLUGIN != "false" ]; then \
36 |     grafana-cli \
37 |         --pluginsDir "$GF_PATHS_PLUGINS" \
38 |         --pluginUrl https://github.com/grafana/grafana-image-renderer/releases/download/$GF_INSTALL_IMAGE_RENDERER_PLUGIN/plugin-linux-x64-glibc-no-chromium.zip \
39 |         plugins install grafana-image-renderer; \
40 | fi
41 | 
42 | ARG GF_INSTALL_PLUGINS=""
43 | 
44 | RUN if [ ! -z "${GF_INSTALL_PLUGINS}" ]; then \
45 |     OLDIFS=$IFS; \
46 |         IFS=','; \
47 |     for plugin in ${GF_INSTALL_PLUGINS}; do \
48 |         IFS=$OLDIFS; \
49 |         grafana-cli --pluginsDir "$GF_PATHS_PLUGINS" plugins install ${plugin}; \
50 |     done; \
51 | fi
52 | 
53 | # Use tini so Chrome processes get reaped
54 | # https://github.com/grafana/grafana-image-renderer/issues/179
55 | COPY --from=tini /tini /tini
56 | ENTRYPOINT ["/tini", "--", "/run.sh"]
57 | 


--------------------------------------------------------------------------------
/scripts/reprocess_all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script imports an exported index, but reprocesses the datasets.
 4 | 
 5 | The name, description, and date are loaded from the old JSON, but the rest will
 6 | be reprofiled instead.
 7 | """
 8 | 
 9 | import aio_pika
10 | import asyncio
11 | import json
12 | import logging
13 | import os
14 | import sys
15 | 
16 | from datamart_core.common import json2msg, decode_dataset_id
17 | 
18 | 
19 | async def import_all(folder):
20 |     amqp_conn = await aio_pika.connect_robust(
21 |         host=os.environ['AMQP_HOST'],
22 |         port=int(os.environ['AMQP_PORT']),
23 |         login=os.environ['AMQP_USER'],
24 |         password=os.environ['AMQP_PASSWORD'],
25 |     )
26 |     amqp_chan = await amqp_conn.channel()
27 |     amqp_profile_exchange = await amqp_chan.declare_exchange(
28 |         'profile',
29 |         aio_pika.ExchangeType.FANOUT,
30 |     )
31 | 
32 |     for name in os.listdir(folder):
33 |         if not name.startswith('lazo.'):
34 |             dataset_id = decode_dataset_id(name)
35 |             path = os.path.join(folder, name)
36 |             with open(path, 'r') as fp:
37 |                 obj = json.load(fp)
38 |             metadata = dict(name=obj['name'],
39 |                             materialize=obj['materialize'],
40 |                             source=obj.get('source', 'unknown'))
41 |             if obj.get('description'):
42 |                 metadata['description'] = obj['description']
43 |             if obj.get('date'):
44 |                 metadata['date'] = obj['date']
45 |             if obj.get('manual_annotations'):
46 |                 metadata['manual_annotations'] = obj['manual_annotations']
47 |             await amqp_profile_exchange.publish(
48 |                 json2msg(dict(id=dataset_id, metadata=metadata)),
49 |                 '',
50 |             )
51 |             print('.', end='', flush=True)
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     logging.basicConfig(level=logging.INFO)
56 | 
57 |     loop = asyncio.get_event_loop()
58 |     loop.run_until_complete(loop.create_task(
59 |         import_all(sys.argv[1])
60 |     ))
61 | 


--------------------------------------------------------------------------------
/frontend/src/components/MainMenu/MainMenu.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import * as Icon from 'react-feather';
 3 | import './MainMenu.css';
 4 | import {DropdownMenu} from '../ui/DropdownMenu/DropdownMenu';
 5 | import {Link as RouterLink} from 'react-router-dom';
 6 | 
 7 | function Link(props: {path: string; label: string; icon: Icon.Icon}) {
 8 |   const content = (
 9 |     <span className="text-oswald">
10 |       <props.icon className="feather-lg mr-1" /> {props.label}
11 |     </span>
12 |   );
13 |   return (
14 |     <div className="menu-link">
15 |       {props.path.startsWith('http:') || props.path.startsWith('https:') ? (
16 |         <a href={props.path}>{content}</a>
17 |       ) : (
18 |         <RouterLink to={props.path}>{content}</RouterLink>
19 |       )}
20 |     </div>
21 |   );
22 | }
23 | 
24 | class MainMenu extends React.PureComponent {
25 |   render() {
26 |     return (
27 |       <DropdownMenu>
28 |         {({active, onClick}) => (
29 |           <>
30 |             <div className="d-flex flex-column main-menu">
31 |               <div className="d-flex justify-content-end" onClick={onClick}>
32 |                 <span style={{cursor: 'pointer'}}>
33 |                   <Icon.Menu />
34 |                 </span>
35 |               </div>
36 |               {active && (
37 |                 <div className="card shadow-sm card-menu mt-2">
38 |                   <Link
39 |                     icon={Icon.UploadCloud}
40 |                     path="/upload"
41 |                     label="Upload Dataset"
42 |                   />
43 |                   <Link
44 |                     icon={Icon.BarChart2}
45 |                     path="/statistics"
46 |                     label="Dataset Statistics"
47 |                   />
48 |                   <Link
49 |                     icon={Icon.BookOpen}
50 |                     path="https://docs.auctus.vida-nyu.org/"
51 |                     label="Documentation"
52 |                   />
53 |                 </div>
54 |               )}
55 |             </div>
56 |           </>
57 |         )}
58 |       </DropdownMenu>
59 |     );
60 |   }
61 | }
62 | 
63 | export {MainMenu};
64 | 


--------------------------------------------------------------------------------
/docker/install_deps.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """Install dependencies from poetry.lock
 4 | 
 5 | This scripts is used as part of the Docker build to install all dependencies as
 6 | an initial step before building the images. This makes caching efficient,
 7 | allowing for faster builds that work offline.
 8 | 
 9 | It means all images have all dependencies installed, but thanks to
10 | de-duplication, this generally uses less space if all images exist on the same
11 | machine.
12 | """
13 | 
14 | import subprocess
15 | import sys
16 | import toml
17 | 
18 | 
19 | def main(args):
20 |     devel = False
21 |     if args[0] == '--dev':
22 |         devel = True
23 |         args = args[1:]
24 | 
25 |     with open(args[0]) as fp:
26 |         lockfile = toml.load(fp)
27 | 
28 |     packages = []
29 | 
30 |     for package in lockfile['package']:
31 |         if package['category'] == 'dev':
32 |             if not devel:
33 |                 continue
34 |         elif package['category'] != 'main':
35 |             raise ValueError(
36 |                 "Unknown package category %s" % package['category']
37 |             )
38 | 
39 |         if 'source' in package:
40 |             if package['source']['type'] == 'git':
41 |                 packages.append('git+%s@%s' % (
42 |                     package['source']['url'],
43 |                     package['source']['reference'],
44 |                 ))
45 |             elif package['source']['type'] == 'url':
46 |                 packages.append(package['source']['url'])
47 |             elif package['source']['type'] != 'directory':
48 |                 raise ValueError(
49 |                     "Unknown package source %s" % package['source']['type']
50 |                 )
51 |             # Ignore 'directory' dependencies
52 |         else:
53 |             packages.append('%s==%s' % (package['name'], package['version']))
54 | 
55 |     subprocess.check_call(
56 |         [
57 |             'pip3',
58 |             '--disable-pip-version-check',
59 |             '--no-cache-dir',
60 |             'install',
61 |         ] + packages,
62 |     )
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     main(sys.argv[1:])
67 | 


--------------------------------------------------------------------------------
/apiserver/apiserver/graceful_shutdown.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import signal
 4 | import tornado.ioloop
 5 | import tornado.web
 6 | 
 7 | from datamart_core.common import log_future
 8 | 
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class GracefulApplication(tornado.web.Application):
14 |     """Application that exits on SIGTERM once no GracefulHandlers are running.
15 |     """
16 |     def __init__(self, *args, **kwargs):
17 |         super(GracefulApplication, self).__init__(*args, **kwargs)
18 | 
19 |         self.is_closing = False
20 |         self.nb_requests = 0
21 |         self.close_condition = asyncio.Condition()
22 | 
23 |         signal.signal(signal.SIGTERM, self.signal_handler)
24 | 
25 |     def signal_handler(self, signum, frame):
26 |         logger.warning("Got signal %s, exiting...", signum)
27 |         self.is_closing = True
28 |         tornado.ioloop.IOLoop.current().add_callback_from_signal(self.try_exit)
29 | 
30 |     def try_exit(self):
31 |         async def do_exit():
32 |             async with self.close_condition:
33 |                 while self.nb_requests > 0:
34 |                     logger.info("%d requests in progress, waiting...",
35 |                                 self.nb_requests)
36 |                     await self.close_condition.wait()
37 |             logger.warning("Closing gracefully")
38 |             tornado.ioloop.IOLoop.current().stop()
39 | 
40 |         log_future(asyncio.get_event_loop().create_task(do_exit()), logger)
41 | 
42 | 
43 | class GracefulHandler(tornado.web.RequestHandler):
44 |     """Handlers that will prevent the application to exit until they're done.
45 |     """
46 |     def prepare(self):
47 |         super(GracefulHandler, self).prepare()
48 |         self.application.nb_requests += 1
49 | 
50 |     def on_finish(self):
51 |         super(GracefulHandler, self).on_finish()
52 | 
53 |         app = self.application
54 | 
55 |         async def do_decrease():
56 |             async with app.close_condition:
57 |                 app.nb_requests -= 1
58 |                 app.close_condition.notify_all()
59 | 
60 |         log_future(asyncio.get_event_loop().create_task(do_decrease()), logger)
61 | 


--------------------------------------------------------------------------------
/frontend/src/components/Logo/auctus-logo.min.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48.16 48.16" height="48.16mm" width="48.16mm"><defs><filter height="1.72" y="-.36" width="1.72" x="-.36" id="a" color-interpolation-filters="sRGB"><feGaussianBlur stdDeviation="4.2"/></filter></defs><g transform="translate(-141.242 -184.898)" fill="#63508b"><circle r="14" cy="208.978" cx="165.322" style="mix-blend-mode:normal" opacity=".5" filter="url(#a)"/><path style="mix-blend-mode:normal" d="M165.438 202.167a16.644 16.644 0 00-.53.01 16.644 16.644 0 00-.352.015 16.644 16.644 0 00-.352.023 16.644 16.644 0 00-.352.03 16.644 16.644 0 00-3.45.711 16.644 16.644 0 00-.168.055 16.644 16.644 0 00-.29.1 16.644 16.644 0 00-1.12.44 16.644 16.644 0 00-10.003 15.26 16.644 16.644 0 00.003.261 16.644 16.644 0 006.642 1.383 16.644 16.644 0 0010-3.34 16.644 16.644 0 0010 3.34 16.644 16.644 0 006.642-1.383 16.644 16.644 0 00.002-.26 16.644 16.644 0 00-10.003-15.262 16.644 16.644 0 00-.984-.391 16.644 16.644 0 00-.5-.172 16.644 16.644 0 00-.169-.054 16.644 16.644 0 00-.16-.05 16.644 16.644 0 00-.008-.002 16.644 16.644 0 00-.009-.003 16.644 16.644 0 00-.16-.048 16.644 16.644 0 00-.165-.047 16.644 16.644 0 00-.005-.002 16.644 16.644 0 00-.004 0 16.644 16.644 0 00-.167-.046 16.644 16.644 0 00-.342-.088 16.644 16.644 0 00-.333-.078 16.644 16.644 0 00-.01-.003 16.644 16.644 0 00-.018-.004 16.644 16.644 0 00-.155-.033 16.644 16.644 0 00-.172-.036 16.644 16.644 0 00-.174-.034 16.644 16.644 0 00-.87-.141 16.644 16.644 0 00-.344-.043 16.644 16.644 0 00-.007-.001 16.644 16.644 0 00-.138-.015 16.644 16.644 0 00-.037-.004 16.644 16.644 0 00-.035-.003 16.644 16.644 0 00-.141-.014 16.644 16.644 0 00-.176-.016 16.644 16.644 0 00-.146-.011 16.644 16.644 0 00-.03-.002 16.644 16.644 0 00-.029-.002 16.644 16.644 0 00-.147-.01 16.644 16.644 0 00-.34-.017 16.644 16.644 0 00-.012 0 16.644 16.644 0 00-.02 0 16.644 16.644 0 00-.156-.006 16.644 16.644 0 00-.347-.007 16.644 16.644 0 00-.007 0 16.644 16.644 0 00-.126 0 16.644 16.644 0 00-.027 0z" opacity=".9"/><path style="mix-blend-mode:normal" d="M165.466 190.507a16.644 16.644 0 00-6.644 13.304 16.644 16.644 0 006.644 13.305 16.644 16.644 0 006.644-13.305 16.644 16.644 0 00-6.644-13.304z" opacity=".9"/></g></svg>


--------------------------------------------------------------------------------
/frontend/src/config.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This function loads variables from <meta> HTML tags. It allows us to
 3 |  * dynamically configure the base path for the application. It can useful
 4 |  * when configuring the system to run behind a proxy under a non-root path
 5 |  * such as: http://example.com/auctus/{all-paths}.
 6 |  *
 7 |  * This can be configured by adding an HTML meta-tag to the static HTML.
 8 |  *     <meta name="base_url" content="https://auctus.vida-nyu.org/">
 9 |  *     <meta name="api_url" content="https://auctus.vida-nyu.org/api/v1/">
10 |  */
11 | function loadVariableFromHTML(name: string): string {
12 |   const meta = document.getElementsByName(name)[0];
13 |   let value: string | null = meta ? meta.getAttribute('content') : null;
14 |   if (!value) {
15 |     value = '';
16 |   } else if (value.endsWith('/')) {
17 |     value = value.substring(0, value.length - 1);
18 |   }
19 |   return value;
20 | }
21 | 
22 | /*
23 |  * During web development,
24 |  * - the web server is started via "npm start" and it runs at localhost:3000;
25 |  * - the API server should be running at the address configured in the "proxy"
26 |  *   key from the package.json file in the project's root directory
27 |  *
28 |  * In the client code, we always send the API requests to address where the
29 |  * page is being served from. In 'development' mode, create-react-app dev
30 |  * server will proxy the requests to the appropriate backend running the REST
31 |  * API. In production, the app is already served from the same server where
32 |  * the REST API is server, so the requests will work seamlessly.
33 |  */
34 | 
35 | let baseUrl = `//${window.location.host}`;
36 | let apiUrl = baseUrl;
37 | 
38 | const isDev = process.env.NODE_ENV === 'development';
39 | if (isDev && process.env.REACT_APP_BASE_URL) {
40 |   baseUrl = process.env.REACT_APP_BASE_URL;
41 | }
42 | if (isDev && process.env.REACT_APP_API_URL) {
43 |   apiUrl = process.env.REACT_APP_API_URL;
44 | }
45 | 
46 | const BASE_URL: string = loadVariableFromHTML('base_url') || baseUrl;
47 | const API_URL = loadVariableFromHTML('api_url') || apiUrl;
48 | 
49 | console.log('BASE_URL', BASE_URL);
50 | console.log('API_URL', API_URL);
51 | 
52 | export {BASE_URL, API_URL};
53 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
 2 | 
 3 | ## Available Scripts
 4 | 
 5 | In the project directory, you can run:
 6 | 
 7 | ### `npm start`
 8 | 
 9 | Runs the app in the development mode.<br />
10 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
11 | 
12 | The page will reload if you make edits.<br />
13 | You will also see any lint errors in the console.
14 | 
15 | ### `npm test`
16 | 
17 | Launches the test runner in the interactive watch mode.<br />
18 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
19 | 
20 | ### `npm run build`
21 | 
22 | Builds the app for production to the `build` folder.<br />
23 | It correctly bundles React in production mode and optimizes the build for the best performance.
24 | 
25 | The build is minified and the filenames include the hashes.<br />
26 | Your app is ready to be deployed!
27 | 
28 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
29 | 
30 | ### `npm run eject`
31 | 
32 | **Note: this is a one-way operation. Once you `eject`, you can’t go back!**
33 | 
34 | If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
35 | 
36 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own.
37 | 
38 | You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it.
39 | 
40 | ## Learn More
41 | 
42 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
43 | 
44 | To learn React, check out the [React documentation](https://reactjs.org/).
45 | 


--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta name="description" content="Auctus - A dataset search engine" />
 8 | 
 9 |     <link href="https://fonts.googleapis.com/css?family=Merriweather|Oswald|Source+Sans+Pro" rel="stylesheet">
10 |     <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
11 |     <!-- <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" /> -->
12 |     <!--
13 |       The following meta tags can be configured to allow serving the app
14 |       under a non-root path, or directing it to a separate API deployment.
15 |     -->
16 |     <meta name="base_url" content="/">
17 |     <meta name="api_url" content="/">
18 | 
19 |     <!--
20 |       manifest.json provides metadata used when your web app is installed on a
21 |       user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
22 |     -->
23 |     <!-- <link rel="manifest" href="%PUBLIC_URL%/manifest.json" /> -->
24 | 
25 |     <!--
26 |       Notice the use of %PUBLIC_URL% in the tags above.
27 |       It will be replaced with the URL of the `public` folder during the build.
28 |       Only files inside the `public` folder can be referenced from the HTML.
29 | 
30 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
31 |       work correctly both with client-side routing and a non-root public URL.
32 |       Learn how to configure a non-root public URL by running `npm run build`.
33 |     -->
34 |     <title>Auctus</title>
35 |   </head>
36 |   <body>
37 |     <noscript>You need to enable JavaScript to run this app.</noscript>
38 |     <div id="root"></div>
39 |     <!--
40 |       This HTML file is a template.
41 |       If you open it directly in the browser, you will see an empty page.
42 | 
43 |       You can add webfonts, meta tags, or analytics to this file.
44 |       The build step will place the bundled scripts into the <body> tag.
45 | 
46 |       To begin the development, run `npm start` or `yarn start`.
47 |       To create a production bundle, use `npm run build` or `yarn build`.
48 |     -->
49 |   </body>
50 | </html>
51 | 


--------------------------------------------------------------------------------
/scripts/export_all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This script exports the index to JSON files.
 4 | 
 5 | It is useful as backup, and to provide snapshots to users so they don't have to
 6 | profile everything to get a system going.
 7 | 
 8 | The exported folder can be loaded in using `import_all.py` (which will simply
 9 | load the JSON files) or `reprocess_all.py` (which will only read some fields,
10 | and get the metadata by reprocessing the datasets).
11 | """
12 | 
13 | import logging
14 | import json
15 | 
16 | from datamart_core.common import PrefixedElasticsearch, encode_dataset_id
17 | 
18 | 
19 | SIZE = 10000
20 | 
21 | 
22 | _unique_filenames = {}
23 | 
24 | 
25 | def unique_filename(pattern):
26 |     """Return a file name with an incrementing number to make it unique.
27 |     """
28 |     number = _unique_filenames.get(pattern, 0) + 1
29 |     _unique_filenames[pattern] = number
30 |     return pattern.format(number)
31 | 
32 | 
33 | def export():
34 |     es = PrefixedElasticsearch()
35 | 
36 |     print("Dumping datasets")
37 |     hits = es.scan(
38 |         index='datasets',
39 |         query={
40 |             'query': {
41 |                 'match_all': {},
42 |             },
43 |         },
44 |         size=SIZE,
45 |     )
46 |     for h in hits:
47 |         # Use dataset ID as file name
48 |         with open(encode_dataset_id(h['_id']), 'w') as fp:
49 |             json.dump(h['_source'], fp, sort_keys=True, indent=2)
50 | 
51 |     print("Dumping Lazo data")
52 |     hits = es.scan(
53 |         index='lazo',
54 |         query={
55 |             'query': {
56 |                 'match_all': {},
57 |             },
58 |         },
59 |         size=SIZE,
60 |     )
61 |     for h in hits:
62 |         # Use "lazo." dataset_id ".NB" as file name
63 |         dataset_id = h['_id'].split('__.__')[0]
64 |         fname = unique_filename(
65 |             'lazo.{0}.{{0}}'.format(encode_dataset_id(dataset_id))
66 |         )
67 |         with open(fname, 'w') as fp:
68 |             json.dump(
69 |                 dict(h['_source'], _id=h['_id']),
70 |                 fp,
71 |                 sort_keys=True,
72 |                 indent=2,
73 |             )
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     logging.basicConfig(level=logging.INFO)
78 | 
79 |     export()
80 | 


--------------------------------------------------------------------------------
/tests/data/geo.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import numpy.random
 3 | import os
 4 | import string
 5 | 
 6 | 
 7 | SIZE = 50
 8 | 
 9 | 
10 | def main():
11 |     lat1, long1 = 40.7298648, -73.9986808
12 |     lat1m, long1m = 40.73287, -74.002031
13 |     lat2, long2 = 40.692725, -73.9865644
14 |     lat2m, long2m = 40.694316, -73.988495
15 | 
16 |     random = numpy.random.RandomState(1)
17 |     latitudes = numpy.concatenate([
18 |         random.normal(lat1, abs(lat1 - lat1m), SIZE),
19 |         random.normal(lat2, abs(lat2 - lat2m), SIZE),
20 |     ])
21 |     random = numpy.random.RandomState(2)
22 |     longitudes = numpy.concatenate([
23 |         random.normal(long1, abs(long1 - long1m), SIZE),
24 |         random.normal(long2, abs(long2 - long2m), SIZE),
25 |     ])
26 |     random = numpy.random.RandomState(3)
27 |     heights = random.normal(50.0, 20.0, 2 * SIZE)
28 | 
29 |     data_dir = os.path.dirname(__file__)
30 |     with open(os.path.join(data_dir, 'geo.csv'), 'w') as f_data:
31 |         print("id,lat,long,height", file=f_data)
32 |         for i, (lat, long, h) in enumerate(zip(latitudes, longitudes, heights)):
33 |             if i == 42:
34 |                 i = ''
35 |             else:
36 |                 i = 'place%02d' % i
37 |             print("%s,%f,%f,%f" % (i, lat, long, h), file=f_data)
38 | 
39 |     with open(os.path.join(data_dir, 'geo_wkt.csv'), 'w') as f_data:
40 |         print("id,coords,height", file=f_data)
41 |         for i, (lat, long, h) in enumerate(zip(latitudes, longitudes, heights)):
42 |             if i == 42:
43 |                 i = ''
44 |             else:
45 |                 i = 'place%02d' % i
46 |             print("%s,POINT (%f %f),%f" % (i, long, lat, h), file=f_data)
47 | 
48 |     random = numpy.random.RandomState(5)
49 |     aug_latitudes = random.normal(lat1, abs(lat1 - lat1m), 10)
50 |     aug_longitudes = random.normal(long1, abs(long1 - long1m), 10)
51 | 
52 |     with open(os.path.join(data_dir, 'geo_aug.csv'), 'w') as f_data:
53 |         print("lat,long,id,letter", file=f_data)
54 |         for i, (lat, long, letter) in enumerate(
55 |             zip(aug_latitudes, aug_longitudes, string.ascii_letters),
56 |             100,
57 |         ):
58 |             print("%f,%f,place%d,%s" % (lat, long, i, letter), file=f_data)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/coordinator/coordinator/templates/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | {% set fluid = False %}
 3 | <html lang="en">
 4 |   <head>
 5 |     <meta charset="UTF-8">
 6 |     <link rel="stylesheet" href="{{ static_url('css/custom.css') }}">
 7 |     <link rel="stylesheet" href="{{ static_url('css/bootstrap.min.css') }}">
 8 |     <title>{% block title %}NYU Auctus{% endblock %}</title>
 9 |     <script>
10 |     var API_URL = '{{ api_url | safe }}';
11 |     var FRONTEND_URL = '{{ frontend_url | safe }}';
12 |     </script>
13 |   </head>
14 |   <body class="nyu-purple">
15 |     <nav class="navbar navbar-expand-lg navbar-dark bg-primary" id="special-nav">
16 |       <div class="container">
17 |         <a class="navbar-brand special-brand" href="#">NYU Auctus</a>
18 |         <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarColor01" aria-controls="navbarColor01" aria-expanded="false" aria-label="Toggle navigation">
19 |           <span class="navbar-toggler-icon"></span>
20 |         </button>
21 |         <div class="collapse navbar-collapse" id="navbarColor01">
22 |           <ul class="navbar-nav mr-auto">
23 |             <li class="nav-item {{ 'active' if active_page == 'index' else '' }}">
24 |               <a class="nav-link" href="{{ reverse_url('index') }}">Index</a>
25 |             </li>
26 |             <li class="nav-item {{ 'active' if active_page == 'query' else '' }}">
27 |               <a class="nav-link" href="{{ reverse_url('query') }}">Query ES</a>
28 |             </li>
29 |           </ul>
30 |         </div>
31 |       </div>
32 |     </nav>
33 | 
34 |     <div class="{% block containerclass %}container{% endblock %} container-body">
35 |       {% block contents %}{% endblock %}
36 | 
37 |       <footer class="footer">
38 |         <hr/>
39 |         <div class="container">
40 |           <p>Part of the Datamart project, supported by <a href="https://www.darpa.mil/program/data-driven-discovery-of-models">DARPA's D3M program</a> | <a href="https://docs.auctus.vida-nyu.org/">Documentation</a> | <a href="https://gitlab.com/ViDA-NYU/auctus/auctus">Source code</a></p>
41 |         </div>
42 |       </footer>
43 |     </div>
44 | 
45 |     <script src="{{ static_url('js/jquery-3.3.1.min.js') }}"></script>
46 |     <script src="{{ static_url('js/bootstrap.bundle.min.js') }}"></script>
47 |   </body>
48 | </html>
49 | 


--------------------------------------------------------------------------------
/lib_materialize/datamart_materialize/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | 
 5 | class SimpleConverterProxy(object):
 6 |     def __init__(self, writer, transform, name, temp_file, fp):
 7 |         self._writer = writer
 8 |         self._transform = transform
 9 |         self._name = name
10 |         self._temp_file = temp_file
11 |         self._fp = fp
12 | 
13 |     def close(self):
14 |         self._fp.close()
15 |         self._convert()
16 | 
17 |     def _convert(self):
18 |         # Read back the file we wrote, and transform it to the final file
19 |         with self._writer.open_file('w', self._name) as dst:
20 |             self._transform(self._temp_file, dst)
21 | 
22 |     # Those methods forward to the actual file object
23 | 
24 |     def write(self, buffer):
25 |         return self._fp.write(buffer)
26 | 
27 |     def flush(self):
28 |         self._fp.flush()
29 | 
30 |     def __enter__(self):
31 |         self._fp.__enter__()
32 |         return self
33 | 
34 |     def __exit__(self, exc, value, tb):
35 |         self._fp.__exit__(exc, value, tb)
36 |         if exc is None:
37 |             self._convert()
38 | 
39 | 
40 | class SimpleConverter(object):
41 |     """Base class for converters simply transforming files through a function.
42 |     """
43 |     def __init__(self, writer):
44 |         self.writer = writer
45 |         self.dir = tempfile.TemporaryDirectory(prefix='datamart_excel_')
46 | 
47 |     def set_metadata(self, dataset_id, metadata):
48 |         self.writer.set_metadata(dataset_id, metadata)
49 | 
50 |     def open_file(self, mode='wb', name=None):
51 |         temp_file = os.path.join(self.dir.name, 'file.xls')
52 | 
53 |         # Return a proxy that will write to the destination when closed
54 |         if mode == 'wb':
55 |             fp = open(temp_file, mode)
56 |         elif mode == 'w':
57 |             fp = open(temp_file, mode, encoding='utf-8', newline='')
58 |         else:
59 |             raise ValueError("Invalid write mode %r" % mode)
60 |         return SimpleConverterProxy(
61 |             self.writer, self.transform,
62 |             name,
63 |             temp_file, fp,
64 |         )
65 | 
66 |     def finish(self):
67 |         self.dir.cleanup()
68 |         self.dir = None
69 | 
70 |     @staticmethod
71 |     def transform(source_filename, dest_fileobj):
72 |         raise NotImplementedError
73 | 


--------------------------------------------------------------------------------
/frontend/src/components/visus/PersistentComponent/PersistentComponent.tsx:
--------------------------------------------------------------------------------
 1 | import {PureComponent} from 'react';
 2 | import {shallowEqual} from '../../../utils';
 3 | 
 4 | const cache = new Map<string, {}>();
 5 | 
 6 | // Patch PureComponent type declaration so that we can access React internal
 7 | // variables. We disable eslint here because the declaration has to match the
 8 | // the declaration from @types/react package.
 9 | declare module 'react' {
10 |   // eslint-disable-next-line @typescript-eslint/no-explicit-any
11 |   interface PureComponent<P = {}, S = {}, SS = any>
12 |     extends React.Component<P, S, SS> {
13 |     _reactInternalFiber: {
14 |       key: string;
15 |       type: {
16 |         displayName: string;
17 |         name: string;
18 |       };
19 |     };
20 |   }
21 | }
22 | 
23 | /**
24 |  * This component uses the key provided to a component to generate a cache key for the data.
25 |  * We chose to use key for the following reasons:
26 |  * 1. React uses key to identify if the element associated with the component.
27 |  *    In some cases this helps it to identify that two instance are the same, and avoid re-constructing the instance.
28 |  *    It is expected that this strategy will help react to avoid destroying a component unnecessarily.
29 |  * 2. React does some work to avoid siblings with the same key, This should provide some warnings when reusing a key.
30 |  * 3. Since it is an internal from each component, it doesn't pollute the props of components.
31 |  *
32 |  */
33 | export class PersistentComponent<
34 |   TProps = {},
35 |   TState = {}
36 | > extends PureComponent<TProps, TState> {
37 |   componentDidMount() {
38 |     if (!this._reactInternalFiber.key) {
39 |       console.warn(
40 |         'When using PersistentComponent please provide the key prop'
41 |       );
42 |     }
43 |     const cacheKey = this.getCacheKey();
44 |     const previousState = cache.get(cacheKey);
45 |     if (previousState && !shallowEqual(this.state, previousState)) {
46 |       this.setState(previousState);
47 |     }
48 |   }
49 | 
50 |   componentWillUnmount() {
51 |     const key = this.getCacheKey();
52 |     cache.set(key, this.state);
53 |   }
54 | 
55 |   private getCacheKey() {
56 |     const name =
57 |       this._reactInternalFiber.type.displayName ||
58 |       this._reactInternalFiber.type.name;
59 |     return `${name}-${this._reactInternalFiber.key}`;
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/lib_core/datamart_core/augment.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | import uuid
 4 | 
 5 | from datamart_augmentation import AugmentationError, join, union
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def augment(data, newdata, metadata, task, writer, columns=None):
12 |     """
13 |     Augments original data based on the task.
14 | 
15 |     :param data: the data to be augmented, as binary file object.
16 |     :param newdata: the path to the CSV file to augment with.
17 |     :param metadata: the metadata of the data to be augmented.
18 |     :param task: the augmentation task.
19 |     :param writer: Writer on which to save the files.
20 |     :param columns: a list of column indices from newdata that will be added to data
21 |       well with data.
22 |     """
23 | 
24 |     if 'id' not in task:
25 |         raise AugmentationError("Dataset id for the augmentation task not provided")
26 | 
27 |     # TODO: add support for combining multiple columns before an augmentation
28 |     #   e.g.: [['street number', 'street', 'city']] and [['address']]
29 |     #   currently, Datamart does not support such cases
30 |     #   this means that spatial joins (with GPS) are not supported for now
31 | 
32 |     # Perform augmentation
33 |     start = time.perf_counter()
34 |     if task['augmentation']['type'] == 'join':
35 |         output_metadata = join(
36 |             data,
37 |             newdata,
38 |             metadata,
39 |             task['metadata'],
40 |             writer,
41 |             task['augmentation']['left_columns'],
42 |             task['augmentation']['right_columns'],
43 |             columns=columns,
44 |             agg_functions=task['augmentation'].get('agg_functions'),
45 |             temporal_resolution=task['augmentation'].get('temporal_resolution'),
46 |         )
47 |     elif task['augmentation']['type'] == 'union':
48 |         output_metadata = union(
49 |             data,
50 |             newdata,
51 |             metadata,
52 |             task['metadata'],
53 |             writer,
54 |             task['augmentation']['left_columns'],
55 |             task['augmentation']['right_columns'],
56 |         )
57 |     else:
58 |         raise AugmentationError("Augmentation task not provided")
59 |     logger.info("Total augmentation: %.4fs", time.perf_counter() - start)
60 | 
61 |     # Write out the metadata
62 |     writer.set_metadata(uuid.uuid4().hex, output_metadata)
63 |     return writer.finish()
64 | 


--------------------------------------------------------------------------------
/scripts/migrate-types-and-attributes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This scripts updates the index for !115 and !127.
 4 | 
 5 | It adds the dataset "types" information (computed from column semantic types)
 6 | and the "attribute_keywords" field (compute from column names).
 7 | """
 8 | 
 9 | import json
10 | import os
11 | import shutil
12 | import sys
13 | 
14 | from datamart_profiler.core import expand_attribute_name
15 | from datamart_profiler.profile_types import determine_dataset_type
16 | 
17 | 
18 | def migrate(from_folder, to_folder):
19 |     assert os.listdir(from_folder)
20 |     assert not os.listdir(to_folder)
21 | 
22 |     datasets = []
23 |     lazo = []
24 |     for f in os.listdir(from_folder):
25 |         if f.startswith('lazo.'):
26 |             lazo.append(f)
27 |         else:
28 |             datasets.append(f)
29 | 
30 |     for i, dataset in enumerate(datasets):
31 |         if i % 100 == 0:
32 |             print("% 5d / %5d datasets processed" % (i, len(datasets)))
33 | 
34 |         with open(os.path.join(from_folder, dataset)) as fp:
35 |             obj = json.load(fp)
36 | 
37 |         if 'attribute_keywords' not in obj:
38 |             attribute_keywords = []
39 |             for col in obj['columns']:
40 |                 attribute_keywords.append(col['name'])
41 |                 kw = list(expand_attribute_name(col['name']))
42 |                 if kw != [col['name']]:
43 |                     attribute_keywords.extend(kw)
44 |             obj['attribute_keywords'] = attribute_keywords
45 | 
46 |         if 'types' not in obj:
47 |             dataset_types = set()
48 |             for col in obj['columns']:
49 |                 type_ = determine_dataset_type(
50 |                     col['structural_type'],
51 |                     col['semantic_types'],
52 |                 )
53 |                 if type_:
54 |                     dataset_types.add(type_)
55 |             obj['types'] = sorted(dataset_types)
56 | 
57 |         with open(os.path.join(to_folder, dataset), 'w') as fp:
58 |             json.dump(obj, fp, sort_keys=True, indent=2)
59 | 
60 |     print("Copying lazo data...")
61 |     for i, f in enumerate(lazo):
62 |         if i % 1000 == 0:
63 |             print("% 5d / %5d files copied" % (i, len(lazo)))
64 |         shutil.copy2(
65 |             os.path.join(from_folder, f),
66 |             os.path.join(to_folder, f),
67 |         )
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     migrate(sys.argv[1], sys.argv[2])
72 | 


--------------------------------------------------------------------------------
/frontend/src/components/visus/Card/Card.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | import styled from 'styled-components';
 3 | import './card.css';
 4 | 
 5 | interface CardProps {
 6 |   title: string;
 7 |   className?: string;
 8 |   style?: React.CSSProperties;
 9 | }
10 | 
11 | class Card extends React.PureComponent<CardProps> {
12 |   render() {
13 |     const cardClassName = this.props.className
14 |       ? 'card ' + this.props.className
15 |       : 'card';
16 |     return (
17 |       <div className={cardClassName} style={this.props.style}>
18 |         <div className="card-body">
19 |           {this.props.title ? (
20 |             <h5 className="card-title">{this.props.title}</h5>
21 |           ) : (
22 |             ''
23 |           )}
24 |           {this.props.children}
25 |         </div>
26 |       </div>
27 |     );
28 |   }
29 | }
30 | 
31 | interface CardShadowProps {
32 |   className?: string;
33 |   height?: string;
34 | }
35 | 
36 | class CardShadow extends React.PureComponent<CardShadowProps> {
37 |   render() {
38 |     const cardClassName = this.props.className
39 |       ? 'card-hover card card-attributes' + this.props.className
40 |       : 'card-hover card card-attributes';
41 |     return (
42 |       <div
43 |         className={cardClassName}
44 |         style={{
45 |           boxShadow: '1px 1px 5px #aaa',
46 |           height: this.props.height ? this.props.height : '250px',
47 |           padding: 0,
48 |         }}
49 |       >
50 |         <div className="card-body">{this.props.children}</div>
51 |       </div>
52 |     );
53 |   }
54 | }
55 | 
56 | interface CardAttrFieldProps {
57 |   textAlign?: string;
58 |   width?: string;
59 |   fontWeight?: string;
60 |   padding?: string;
61 | }
62 | 
63 | const CardAttrField = styled.div<CardAttrFieldProps>`
64 |   font-weight: ${({fontWeight}) => fontWeight || 'normal'};
65 |   text-align: ${({textAlign}) => textAlign || 'right'};
66 |   width: ${({width}) => width || '110px'};
67 |   padding: ${({padding}) => padding || '0 15px'};
68 | `;
69 | 
70 | const CardAttrValue = styled.div`
71 |   flex: 1;
72 |   padding-right: 15px;
73 |   overflow-wrap: break-word;
74 |   word-wrap: break-word;
75 |   word-break: break-word;
76 | `;
77 | 
78 | export const CardButton = styled.div`
79 |   display: flex;
80 |   justify-content: center;
81 |   flex-direction: column;
82 |   text-align: center;
83 |   height: 100%;
84 |   cursor: pointer;
85 | `;
86 | 
87 | export {Card, CardShadow, CardAttrField, CardAttrValue};
88 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "dependencies": {
 6 |     "@hanreev/types-ol": "^2.0.8",
 7 |     "@material-ui/core": "^4.11.1",
 8 |     "@testing-library/jest-dom": "^4.2.4",
 9 |     "@testing-library/react": "^9.4.1",
10 |     "@testing-library/user-event": "^7.2.1",
11 |     "@types/d3-scale": "^3.2.2",
12 |     "@types/jest": "^24.9.1",
13 |     "@types/node": "^12.12.29",
14 |     "@types/ol": "^5.3.7",
15 |     "@types/react": "^16.9.23",
16 |     "@types/react-dom": "^16.9.5",
17 |     "@types/react-router-dom": "^5.1.3",
18 |     "@types/react-table": "^7.0.10",
19 |     "@types/styled-components": "^5.0.1",
20 |     "axios": "^0.21.4",
21 |     "d3-scale": "^3.2.4",
22 |     "moment": "^2.24.0",
23 |     "ol": "^5.3.3",
24 |     "react": "^16.13.1",
25 |     "react-datepicker": "^2.13.0",
26 |     "react-dnd": "^10.0.2",
27 |     "react-dnd-html5-backend": "^10.0.2",
28 |     "react-dom": "^16.13.0",
29 |     "react-dropzone": "^10.2.1",
30 |     "react-feather": "^2.0.3",
31 |     "react-router-dom": "^5.1.2",
32 |     "react-scripts": "4.0.3",
33 |     "react-table": "^7.0.0",
34 |     "react-vega": "^7.3.0",
35 |     "styled-components": "^5.0.1",
36 |     "vega": "^5.11.1",
37 |     "vega-lite": "^4.12.0"
38 |   },
39 |   "scripts": {
40 |     "start": "react-scripts start",
41 |     "build": "react-scripts build",
42 |     "test": "react-scripts test",
43 |     "eject": "react-scripts eject",
44 |     "check": "gts check",
45 |     "clean": "gts clean",
46 |     "compile": "tsc -p .",
47 |     "fix": "gts fix",
48 |     "pretest": "npm run compile",
49 |     "posttest": "npm run check"
50 |   },
51 |   "eslintConfig": {
52 |     "extends": "react-app"
53 |   },
54 |   "browserslist": {
55 |     "production": [
56 |       ">0.2%",
57 |       "not dead",
58 |       "not op_mini all"
59 |     ],
60 |     "development": [
61 |       "last 1 chrome version",
62 |       "last 1 firefox version",
63 |       "last 1 safari version"
64 |     ]
65 |   },
66 |   "devDependencies": {
67 |     "@types/node": "^12.12.29",
68 |     "@types/react-datepicker": "^2.11.0",
69 |     "canvas": "^2.8.0",
70 |     "eslint-plugin-react": "^7.20.6",
71 |     "gts": "2.0.2",
72 |     "jest-canvas-mock": "^2.2.0",
73 |     "typescript": "^3.9.10"
74 |   },
75 |   "jest": {
76 |     "transformIgnorePatterns": [
77 |       "/node_modules/(?!ol).+\\.js$"
78 |     ]
79 |   },
80 |   "proxy": "https://auctus.vida-nyu.org/api/v1"
81 | }
82 | 


--------------------------------------------------------------------------------
/scripts/migrate-temporal-coverage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """This scripts updates the index for !162.
 4 | 
 5 | It creates the 'temporal_coverage' (in 'datasets' index) and the
 6 | 'temporal_coverage' index.
 7 | """
 8 | 
 9 | import json
10 | import os
11 | import shutil
12 | import sys
13 | 
14 | 
15 | def migrate(from_folder, to_folder):
16 |     assert os.listdir(from_folder)
17 |     assert not os.listdir(to_folder)
18 | 
19 |     datasets = []
20 |     lazo = []
21 |     for f in os.listdir(from_folder):
22 |         if f.startswith('lazo.'):
23 |             lazo.append(f)
24 |         else:
25 |             datasets.append(f)
26 | 
27 |     for i, dataset in enumerate(datasets):
28 |         if i % 100 == 0:
29 |             print("% 5d / %5d datasets processed" % (i, len(datasets)))
30 | 
31 |         with open(os.path.join(from_folder, dataset)) as fp:
32 |             obj = json.load(fp)
33 | 
34 |         if 'temporal_coverage' not in obj:
35 |             temporal_coverage = []
36 |             for idx, column in enumerate(obj['columns']):
37 |                 if 'http://schema.org/DateTime' in column['semantic_types']:
38 |                     coverage = {
39 |                         'type': 'datetime',
40 |                         'column_names': [column['name']],
41 |                         'column_indexes': [idx],
42 |                         'column_types': ['http://schema.org/DateTime'],
43 |                         'ranges': column.pop('coverage'),
44 |                     }
45 |                     column.pop('mean', None)
46 |                     column.pop('stddev', None)
47 |                     if 'temporal_resolution' in column:
48 |                         coverage['temporal_resolution'] = \
49 |                             column.pop('temporal_resolution')
50 |                     temporal_coverage.append(coverage)
51 | 
52 |             if temporal_coverage:
53 |                 obj['temporal_coverage'] = temporal_coverage
54 | 
55 |         with open(os.path.join(to_folder, dataset), 'w') as fp:
56 |             json.dump(obj, fp, sort_keys=True, indent=2)
57 | 
58 |     print("Copying lazo data...")
59 |     for i, f in enumerate(lazo):
60 |         if i % 1000 == 0:
61 |             print("% 5d / %5d files copied" % (i, len(lazo)))
62 |         shutil.copy2(
63 |             os.path.join(from_folder, f),
64 |             os.path.join(to_folder, f),
65 |         )
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     migrate(sys.argv[1], sys.argv[2])
70 | 


--------------------------------------------------------------------------------
/contrib/k8s/discovery/worldbank.libsonnet:
--------------------------------------------------------------------------------
 1 | local utils = import '../utils.libsonnet';
 2 | 
 3 | function(
 4 |   config,
 5 |   schedule='0 1 * * 1,3,5',
 6 | ) {
 7 |   'worldbank-cronjob': config.kube('batch/v1beta1', 'CronJob', {
 8 |     file:: 'discovery.yml',
 9 |     metadata: {
10 |       name: 'worldbank',
11 |       labels: {
12 |         app: 'auctus',
13 |         what: 'worldbank',
14 |       },
15 |     },
16 |     spec: {
17 |       schedule: schedule,
18 |       jobTemplate: {
19 |         metadata: {
20 |           labels: {
21 |             app: 'auctus',
22 |             what: 'worldbank',
23 |           },
24 |         },
25 |         spec: {
26 |           template: {
27 |             metadata: {
28 |               labels: {
29 |                 app: 'auctus',
30 |                 what: 'worldbank',
31 |               },
32 |             },
33 |             spec: {
34 |               restartPolicy: 'Never',
35 |               securityContext: {
36 |                 runAsNonRoot: true,
37 |               },
38 |               containers: [
39 |                 {
40 |                   name: 'worldbank',
41 |                   image: config.image,
42 |                   imagePullPolicy: 'IfNotPresent',
43 |                   args: ['python', '-m', 'worldbank_discovery'],
44 |                   env: utils.env(
45 |                     {
46 |                       LOG_FORMAT: config.log_format,
47 |                       ELASTICSEARCH_HOSTS: 'elasticsearch:9200',
48 |                       ELASTICSEARCH_PREFIX: config.elasticsearch.prefix,
49 |                       AMQP_HOST: 'rabbitmq',
50 |                       AMQP_PORT: '5672',
51 |                       AMQP_USER: {
52 |                         secretKeyRef: {
53 |                           name: 'secrets',
54 |                           key: 'amqp.user',
55 |                         },
56 |                       },
57 |                       AMQP_PASSWORD: {
58 |                         secretKeyRef: {
59 |                           name: 'secrets',
60 |                           key: 'amqp.password',
61 |                         },
62 |                       },
63 |                       LAZO_SERVER_HOST: 'lazo',
64 |                       LAZO_SERVER_PORT: '50051',
65 |                     }
66 |                     + utils.object_store_env(config.object_store)
67 |                   ),
68 |                 },
69 |               ],
70 |             },
71 |           },
72 |         },
73 |       },
74 |     },
75 |   }),
76 | }
77 | 


--------------------------------------------------------------------------------
/contrib/k8s/discovery/uaz-indicators.libsonnet:
--------------------------------------------------------------------------------
 1 | local utils = import '../utils.libsonnet';
 2 | 
 3 | function(
 4 |   config,
 5 |   schedule='20 1 * * 1,3,5',
 6 | ) {
 7 |   'uaz-indicators-cronjob': config.kube('batch/v1beta1', 'CronJob', {
 8 |     file:: 'discovery.yml',
 9 |     metadata: {
10 |       name: 'uaz-indicators',
11 |       labels: {
12 |         app: 'auctus',
13 |         what: 'uaz-indicators',
14 |       },
15 |     },
16 |     spec: {
17 |       schedule: schedule,
18 |       jobTemplate: {
19 |         metadata: {
20 |           labels: {
21 |             app: 'auctus',
22 |             what: 'uaz-indicators',
23 |           },
24 |         },
25 |         spec: {
26 |           template: {
27 |             metadata: {
28 |               labels: {
29 |                 app: 'auctus',
30 |                 what: 'uaz-indicators',
31 |               },
32 |             },
33 |             spec: {
34 |               restartPolicy: 'Never',
35 |               securityContext: {
36 |                 runAsNonRoot: true,
37 |               },
38 |               containers: [
39 |                 {
40 |                   name: 'uaz-indicators',
41 |                   image: config.image,
42 |                   imagePullPolicy: 'IfNotPresent',
43 |                   args: ['python', '-m', 'uaz_indicators'],
44 |                   env: utils.env(
45 |                     {
46 |                       LOG_FORMAT: config.log_format,
47 |                       ELASTICSEARCH_HOSTS: 'elasticsearch:9200',
48 |                       ELASTICSEARCH_PREFIX: config.elasticsearch.prefix,
49 |                       AMQP_HOST: 'rabbitmq',
50 |                       AMQP_PORT: '5672',
51 |                       AMQP_USER: {
52 |                         secretKeyRef: {
53 |                           name: 'secrets',
54 |                           key: 'amqp.user',
55 |                         },
56 |                       },
57 |                       AMQP_PASSWORD: {
58 |                         secretKeyRef: {
59 |                           name: 'secrets',
60 |                           key: 'amqp.password',
61 |                         },
62 |                       },
63 |                       LAZO_SERVER_HOST: 'lazo',
64 |                       LAZO_SERVER_PORT: '50051',
65 |                     }
66 |                     + utils.object_store_env(config.object_store)
67 |                   ),
68 |                 },
69 |               ],
70 |             },
71 |           },
72 |         },
73 |       },
74 |     },
75 |   }),
76 | }
77 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "auctus"
 3 | version = "0.10"
 4 | description = "Auctus system meta-package"
 5 | 
 6 | license = "Apache-2.0"
 7 | 
 8 | authors = [
 9 |     "Remi Rampin <remi.rampin@nyu.edu>",
10 | ]
11 | 
12 | repository = "https://gitlab.com/ViDA-NYU/auctus/auctus"
13 | homepage = "https://auctus.vida-nyu.org/"
14 | 
15 | keywords = ["auctus", "datamart"]
16 | 
17 | classifiers = [
18 |     "Development Status :: 4 - Beta",
19 |     "Intended Audience :: Science/Research",
20 |     "Operating System :: Unix",
21 |     "Programming Language :: Python :: 3 :: Only",
22 |     "Topic :: Scientific/Engineering :: Information Analysis",
23 | ]
24 | 
25 | packages = []
26 | 
27 | include = []
28 | 
29 | [tool.poetry.dependencies]
30 | python = "^3.8,<3.11" # Upper bound for numpy
31 | datamart-profiler = {path = "./lib_profiler", develop=true}
32 | datamart-materialize = {path = "./lib_materialize", develop=true}
33 | datamart-augmentation = {path = "./lib_augmentation", develop=true}
34 | datamart-geo = {path = "lib_geo", develop=true}
35 | datamart-core = {path = "./lib_core", develop=true}
36 | datamart-fslock = {path = "./lib_fslock", develop=true}
37 | datamart-coordinator-service = {path = "./coordinator", develop=true}
38 | datamart-profiler-service = {path = "./profiler", develop=true}
39 | datamart-api-service = {path = "./apiserver", develop=true}
40 | datamart-cache-cleaner-service = {path = "cache_cleaner", develop=true}
41 | datamart-snapshotter-service = {path = "snapshotter", develop=true}
42 | datamart-noaa-discovery-service = {path = "./discovery/noaa", develop=true}
43 | datamart-socrata-discovery-service = {path = "./discovery/socrata", develop=true}
44 | datamart-zenodo-discovery-service = {path = "./discovery/zenodo", develop=true}
45 | datamart-ckan-discovery-service = {path = "./discovery/ckan", develop=true}
46 | datamart-uaz-indicators-service = {path = "./discovery/uaz_indicators", develop=true}
47 | datamart-worldbank-discovery-service = {path= "./discovery/worldbank", develop=true}
48 | datamart-isi-discovery-service = {path= "./discovery/isi", develop=true}
49 | toml = "*"
50 | opentelemetry-exporter-jaeger-thrift = "*"
51 | 
52 | [tool.poetry.dev-dependencies]
53 | flake8 = "*"
54 | PyYaml = "*"
55 | requests = "*"
56 | coverage = "*" # Keep it in sync with Dockerfiles for CI
57 | jsonschema = ">=3.0,<4"
58 | readme_renderer = "*"
59 | Sphinx = "*"
60 | sphinx-rtd-theme = "^0.5.0"
61 | 
62 | [build-system]
63 | requires = ["poetry-core>=1.0.0"]
64 | build-backend = "poetry.core.masonry.api"
65 | 


--------------------------------------------------------------------------------