├── .eslintignore ├── fe ├── .npmrc ├── src │ ├── routes │ │ ├── +layout.js │ │ ├── _datasets │ │ │ ├── +page.svelte │ │ │ └── +layout.svelte │ │ ├── +error.svelte │ │ ├── +page.svelte │ │ └── +layout.svelte │ ├── lib │ │ ├── app │ │ │ ├── components │ │ │ │ ├── elementary │ │ │ │ │ ├── DateTimeField.svelte │ │ │ │ │ ├── MenuItem.svelte │ │ │ │ │ ├── SelectMenu.svelte │ │ │ │ │ ├── AutoComplete.svelte │ │ │ │ │ ├── Tab.svelte │ │ │ │ │ ├── Collapsible.svelte │ │ │ │ │ ├── TabContainer.svelte │ │ │ │ │ ├── RadioList.svelte │ │ │ │ │ ├── BooleanRadios.svelte │ │ │ │ │ ├── ObjectEditor.svelte │ │ │ │ │ ├── PanelMenu.svelte │ │ │ │ │ ├── Select.svelte │ │ │ │ │ └── TypedField.svelte │ │ │ │ ├── explore │ │ │ │ │ ├── viz │ │ │ │ │ │ ├── displays │ │ │ │ │ │ │ ├── Location.svelte │ │ │ │ │ │ │ ├── ValueDisplay.svelte │ │ │ │ │ │ │ ├── ObjectDisplay.svelte │ │ │ │ │ │ │ ├── AggResultView.svelte │ │ │ │ │ │ │ └── LevelDisplay.svelte │ │ │ │ │ │ └── AggResults.svelte │ │ │ │ │ ├── coverage │ │ │ │ │ │ └── utils.js │ │ │ │ │ └── suggestions │ │ │ │ │ │ └── FieldMenu.svelte │ │ │ │ ├── AggSelector.svelte │ │ │ │ └── svizzle │ │ │ │ │ └── InputWidget.svelte │ │ │ ├── utils │ │ │ │ ├── version.js │ │ │ │ ├── events.js │ │ │ │ ├── data.js │ │ │ │ ├── data.spec.js │ │ │ │ └── net.js │ │ │ ├── machines │ │ │ │ ├── explore │ │ │ │ │ ├── history.context.js │ │ │ │ │ ├── route.context.js │ │ │ │ │ ├── README.md │ │ │ │ │ ├── results.context.js │ │ │ │ │ ├── history.config.js │ │ │ │ │ ├── search.context.js │ │ │ │ │ ├── history.options.js │ │ │ │ │ ├── selecting.config.js │ │ │ │ │ ├── route.config.js │ │ │ │ │ ├── selecting.options.js │ │ │ │ │ ├── selecting.context.js │ │ │ │ │ ├── route.js │ │ │ │ │ ├── route.options.js │ │ │ │ │ └── results.config.js │ │ │ │ ├── builder │ │ │ │ │ ├── tester.js │ │ │ │ │ ├── form.js │ │ │ │ │ ├── history.config.js │ │ │ │ │ ├── docs.config.js │ │ │ │ │ ├── README.md │ │ │ │ │ ├── route.js │ │ │ │ │ ├── route.config.js │ │ │ │ │ ├── formediting.context.js │ │ │ │ │ ├── docs.options.js │ │ │ │ │ ├── form.context.js │ │ │ │ │ ├── tester.config.js │ │ │ │ │ ├── tester.options.js │ │ │ │ │ ├── formediting.config.js │ │ │ │ │ ├── history.options.js │ │ │ │ │ └── history.test.md │ │ │ │ └── utils.js │ │ │ └── stores │ │ │ │ └── auth.js │ │ ├── elasticsearch │ │ │ ├── aggs │ │ │ │ ├── spec │ │ │ │ │ ├── global.todo.js │ │ │ │ │ ├── parent.todo.js │ │ │ │ │ ├── matrix_stats.todo.js │ │ │ │ │ ├── filter.todo.js │ │ │ │ │ ├── composite.todo.js │ │ │ │ │ ├── children.todo.js │ │ │ │ │ ├── filters.todo.js │ │ │ │ │ ├── adjacency_matrix.todo.js │ │ │ │ │ ├── nested.todo.js │ │ │ │ │ ├── geo_centroid.js │ │ │ │ │ ├── sampler.todo.js │ │ │ │ │ ├── missing.js │ │ │ │ │ ├── value_count.js │ │ │ │ │ ├── min.js │ │ │ │ │ ├── avg.js │ │ │ │ │ ├── max.js │ │ │ │ │ ├── reverse_nested.todo.js │ │ │ │ │ ├── geo_bounds.js │ │ │ │ │ ├── sum.js │ │ │ │ │ ├── stats.js │ │ │ │ │ ├── string_stats.js │ │ │ │ │ ├── boxplot.js │ │ │ │ │ ├── extended_stats.js │ │ │ │ │ ├── range.js │ │ │ │ │ ├── cardinality.js │ │ │ │ │ ├── median_absolute_deviation.js │ │ │ │ │ ├── rate.js │ │ │ │ │ ├── percentile_ranks.js │ │ │ │ │ ├── scripted_metric.js │ │ │ │ │ ├── top_hits.js │ │ │ │ │ ├── rare_terms.js │ │ │ │ │ ├── geo_distance.js │ │ │ │ │ ├── percentiles.js │ │ │ │ │ ├── geohash_grid.js │ │ │ │ │ ├── weighted_avg.js │ │ │ │ │ ├── t_test.js │ │ │ │ │ ├── index.js │ │ │ │ │ ├── diversified_sampler.todo.js │ │ │ │ │ ├── variable_width_histogram.js │ │ │ │ │ └── histogram.js │ │ │ │ ├── response │ │ │ │ │ ├── docCount.js │ │ │ │ │ ├── bucketsDocCount.js │ │ │ │ │ ├── value.js │ │ │ │ │ ├── geoCentroid.js │ │ │ │ │ ├── geoBounds.js │ │ │ │ │ ├── stats.js │ │ │ │ │ ├── bucketsGeohashGrid.js │ │ │ │ │ ├── boxplot.js │ │ │ │ │ ├── bucketsNumberAuto.js │ │ │ │ │ ├── bucketsGeotileGrid.js │ │ │ │ │ ├── bucketsTerms.js │ │ │ │ │ ├── numToNum.js │ │ │ │ │ ├── stringStats.js │ │ │ │ │ ├── bucketsDate.js │ │ │ │ │ ├── topMetrics.js │ │ │ │ │ ├── bucketsRange.js │ │ │ │ │ ├── bucketsTextScore.js │ │ │ │ │ ├── bucketsDateRange.js │ │ │ │ │ ├── hits.js │ │ │ │ │ └── extendedStats.js │ │ │ │ ├── test │ │ │ │ │ ├── TODO.md │ │ │ │ │ └── check_exports.spec.js │ │ │ │ ├── utils │ │ │ │ │ ├── suggestions.js │ │ │ │ │ ├── version.js │ │ │ │ │ ├── version.spec.js │ │ │ │ │ └── query.js │ │ │ │ ├── bin │ │ │ │ │ └── makeAggToResponseId.js │ │ │ │ ├── ref │ │ │ │ │ ├── aggToResponseType.js │ │ │ │ │ └── typeGroups.js │ │ │ │ └── README.md │ │ │ ├── types │ │ │ │ ├── response.js │ │ │ │ ├── aggs.utils.js │ │ │ │ ├── params.utils.js │ │ │ │ ├── aggs.utils.spec.js │ │ │ │ └── fields.utils.js │ │ │ ├── utils │ │ │ │ ├── docs.js │ │ │ │ ├── coverage.js │ │ │ │ ├── aggParams.js │ │ │ │ └── aggQuery.js │ │ │ └── config │ │ │ │ └── aggsLabels.js │ │ ├── utils │ │ │ ├── svizzle │ │ │ │ └── utils │ │ │ │ │ ├── collection-object.js │ │ │ │ │ ├── array-[any-any].js │ │ │ │ │ ├── [any-any]-[any-any].js │ │ │ │ │ ├── [any-boolean]-[array-boolean].js │ │ │ │ │ ├── obj-[string-any].js │ │ │ │ │ ├── obj-string.js │ │ │ │ │ └── [any-array]-[array-object].js │ │ │ ├── ids.js │ │ │ ├── generic.js │ │ │ ├── net.js │ │ │ └── specs.js │ │ ├── env.js │ │ └── types │ │ │ └── utils.js │ ├── app.html │ └── bin │ │ └── machines │ │ ├── builder_copy_route.js │ │ ├── explore_copy_route.js │ │ └── builder_copy_form.js ├── static │ ├── favicon.png │ ├── logo-192.png │ ├── logo-512.png │ ├── font │ │ ├── OpenSans-Bold.ttf │ │ ├── OpenSans-Light.ttf │ │ ├── OpenSans-Regular.ttf │ │ └── OpenSans-SemiBold.ttf │ └── manifest.json ├── .eslintignore ├── .gitignore ├── vite.config.js ├── jsconfig.json ├── svelte.config.js └── .eslintrc.yml ├── be ├── .gitignore ├── Caddyfile ├── src │ ├── hash.js │ ├── conf.js │ ├── db.js │ ├── bin │ │ └── clearCache.js │ ├── coverage.js │ └── hooks.js ├── README.md ├── package.json └── docker-compose.yml ├── .gitignore ├── specs ├── responses │ └── examples │ │ ├── min │ │ ├── response.json │ │ └── query.txt │ │ ├── sum │ │ ├── response.json │ │ └── query.txt │ │ ├── cardinality │ │ ├── response.json │ │ └── query.txt │ │ ├── max │ │ ├── response.json │ │ └── query.txt │ │ ├── avg │ │ ├── response.json │ │ └── query.txt │ │ ├── missing │ │ ├── response.json │ │ └── query.txt │ │ ├── value_count │ │ ├── response.json │ │ └── query.txt │ │ ├── weighted_avg │ │ ├── response.json │ │ └── query.txt │ │ ├── top_hits │ │ └── query.txt │ │ ├── stats │ │ ├── query.txt │ │ └── response.json │ │ ├── geo_bounds │ │ ├── query.txt │ │ └── response.json │ │ ├── geo_centroid │ │ ├── response.json │ │ └── query.txt │ │ ├── percentiles │ │ ├── query.txt │ │ └── response.json │ │ ├── extended_stats │ │ ├── query.txt │ │ └── response.json │ │ ├── significant_text │ │ ├── query.txt │ │ └── response.json │ │ ├── histogram │ │ └── query.txt │ │ ├── date_histogram │ │ └── query.txt │ │ └── terms │ │ ├── query.txt │ │ └── response.json └── indices │ ├── hpmt_epc_v0.yaml │ └── types │ └── type_templates.yaml ├── README.md ├── .vscode └── settings.json ├── netlify.toml ├── package.json ├── githooks ├── post-receive └── README.md ├── CHANGELOG.md ├── .github └── workflows │ └── ci_cd.yml └── LICENSE /.eslintignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fe/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | -------------------------------------------------------------------------------- /be/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .env -------------------------------------------------------------------------------- /fe/src/routes/+layout.js: -------------------------------------------------------------------------------- 1 | export const prerender = true; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vscode 3 | /node_modules 4 | be/.env 5 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/DateTimeField.svelte: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/MenuItem.svelte: -------------------------------------------------------------------------------- 1 |
  • 2 | 3 |
  • 4 | -------------------------------------------------------------------------------- /fe/src/lib/app/utils/version.js: -------------------------------------------------------------------------------- 1 | export { version } from '../../../../../package.json'; 2 | -------------------------------------------------------------------------------- /specs/responses/examples/min/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 3 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dapsboard 2 | 3 | A web app to visualise the data hosted in the Nesta DAPS platform. 4 | -------------------------------------------------------------------------------- /specs/responses/examples/sum/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 11193729 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /fe/static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/favicon.png -------------------------------------------------------------------------------- /fe/static/logo-192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/logo-192.png -------------------------------------------------------------------------------- /fe/static/logo-512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/logo-512.png -------------------------------------------------------------------------------- /specs/responses/examples/cardinality/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 115 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /specs/responses/examples/max/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 30079814466 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /specs/responses/examples/avg/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 741707.0497661051 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /specs/responses/examples/missing/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "doc_count": 357361 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /specs/responses/examples/value_count/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 507506 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "eslint.workingDirectories": [ 3 | "./be", 4 | "./fe" 5 | ] 6 | } -------------------------------------------------------------------------------- /specs/responses/examples/weighted_avg/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "value": 734632.3710127474 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/global.todo.js: -------------------------------------------------------------------------------- 1 | // filter agg 2 | // /search-aggregations-bucket-global-aggregation.html 3 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/parent.todo.js: -------------------------------------------------------------------------------- 1 | // filter agg 2 | // /search-aggregations-bucket-parent-aggregation.html 3 | -------------------------------------------------------------------------------- /fe/static/font/OpenSans-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/font/OpenSans-Bold.ttf -------------------------------------------------------------------------------- /fe/static/font/OpenSans-Light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/font/OpenSans-Light.ttf -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/matrix_stats.todo.js: -------------------------------------------------------------------------------- 1 | // multi-field 2 | // /search-aggregations-matrix-stats-aggregation.html 3 | -------------------------------------------------------------------------------- /fe/static/font/OpenSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/font/OpenSans-Regular.ttf -------------------------------------------------------------------------------- /fe/static/font/OpenSans-SemiBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindrones/nestauk-dapsboard/dev/fe/static/font/OpenSans-SemiBold.ttf -------------------------------------------------------------------------------- /fe/.eslintignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | .env.* 4 | !.env.example 5 | /.svelte-kit 6 | /build 7 | /node_modules 8 | /package 9 | package-lock.json 10 | -------------------------------------------------------------------------------- /specs/responses/examples/top_hits/query.txt: -------------------------------------------------------------------------------- 1 | TODO (see https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-aggregations-metrics-top-hits-aggregation.html) 2 | -------------------------------------------------------------------------------- /be/Caddyfile: -------------------------------------------------------------------------------- 1 | dapsboard.cache.dev.dap-tools.uk { 2 | reverse_proxy localhost:4000 3 | } 4 | 5 | dapsboard.cache.prod.dap-tools.uk { 6 | reverse_proxy localhost:4000 7 | } -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/history.context.js: -------------------------------------------------------------------------------- 1 | import {writable} from 'svelte/store'; 2 | 3 | export const createHistoryStores = () => ({ 4 | currentURL: writable(), 5 | }); 6 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/filter.todo.js: -------------------------------------------------------------------------------- 1 | // filter agg 2 | // /search-aggregations-bucket-filter-aggregation.html 3 | 4 | // filter: { 5 | // filter: object 6 | // }, 7 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/types/response.js: -------------------------------------------------------------------------------- 1 | import {string} from '$lib/types/index.js'; 2 | 3 | export const zoomXYString = { 4 | ...string, 5 | regex: /\d+\/\d+\/\d+/gu 6 | }; 7 | -------------------------------------------------------------------------------- /fe/src/routes/_datasets/+page.svelte: -------------------------------------------------------------------------------- 1 | 2 | Dapsboard - datasets 3 | 4 | 5 |

    Please select a dataset.

    6 | 7 | 8 | -------------------------------------------------------------------------------- /netlify.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | command = "cd fe && npm i && npm run deploy" 3 | publish = "fe/build" 4 | 5 | [build.environment] 6 | NODE_VERSION = "18.13.0" 7 | ADAPTER = "netlify" 8 | -------------------------------------------------------------------------------- /specs/responses/examples/avg/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:avg,field:cost_total_project,type:integer))),size:0) 2 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/composite.todo.js: -------------------------------------------------------------------------------- 1 | // multi-field 2 | // https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-bucket-composite-aggregation.html 3 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/collection-object.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | 3 | export const collectionToObject = 4 | collection => _.fromPairs(Array.from(collection.entries())); 5 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/array-[any-any].js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @svizzle/utils/array-[any-any] 3 | */ 4 | 5 | import {list, pipe} from 'lamb'; 6 | 7 | export const ψ = pipe([list, pipe]); 8 | -------------------------------------------------------------------------------- /specs/responses/examples/sum/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:eurito_patstat_v0,form:primary,forms:!((name:primary,selection:(aggregation:sum,field:count_citations_patent,type:integer))),size:0) 2 | -------------------------------------------------------------------------------- /be/src/hash.js: -------------------------------------------------------------------------------- 1 | import crypto from 'crypto'; 2 | 3 | export const hash = input => { 4 | const hashFunction = crypto.createHash('md5'); 5 | return hashFunction.update(input).digest('hex'); 6 | }; 7 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/utils/docs.js: -------------------------------------------------------------------------------- 1 | import aggParamDocByAggId from '$lib/elasticsearch/config/aggParamDocByAggId.js'; 2 | 3 | export const getAggDocs = agg => aggParamDocByAggId[agg]?.__docs; 4 | -------------------------------------------------------------------------------- /specs/responses/examples/max/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:eurito_crunchbase_v1,form:primary,forms:!((name:primary,selection:(aggregation:max,field:cost_of_funding,type:float)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/min/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:eurito_crunchbase_v1,form:primary,forms:!((name:primary,selection:(aggregation:min,field:cost_of_funding,type:float)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/stats/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:stats,field:cost_total_project,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/weighted_avg/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:weighted_avg,field:cost_total_project,type:integer))),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/missing/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:missing,field:cost_total_project,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/stats/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "count": 796084, 4 | "min": 1, 5 | "max": 2765946854, 6 | "avg": 741707.0497661051, 7 | "sum": 590461115006 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /be/README.md: -------------------------------------------------------------------------------- 1 | # dapsboard cache layer 2 | 3 | To run, first cd inside this directory (the `be` dir) and run 4 | 5 | ``` 6 | docker compose up 7 | ``` 8 | 9 | This will expose the server on port `4000`. 10 | -------------------------------------------------------------------------------- /specs/responses/examples/geo_bounds/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:eurito_crunchbase_v0,form:primary,forms:!((name:primary,selection:(aggregation:geo_bounds,field:coordinate_of_city,type:geoPointObject))),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/geo_centroid/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "location": { 4 | "lat": 35.873757200922626, 5 | "lon": -35.96746732692259 6 | }, 7 | "count": 545302 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /specs/responses/examples/cardinality/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:cardinality,field:id_isoNumeric_country,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/percentiles/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:percentiles,field:cost_total_project,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/extended_stats/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:extended_stats,field:cost_total_project,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/significant_text/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:significant_text,field:placeName_continent_organisation)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /fe/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | .env.* 4 | !.env.example 5 | /.svelte-kit 6 | /build 7 | /node_modules 8 | /package 9 | /src/lib/app/data 10 | /static/dsl/ 11 | vite.config.js.timestamp-* 12 | vite.config.ts.timestamp-* 13 | -------------------------------------------------------------------------------- /fe/src/lib/app/stores/auth.js: -------------------------------------------------------------------------------- 1 | import {writable} from 'svelte/store'; 2 | 3 | export const _credentials = writable(); 4 | 5 | export const _isAuthenticated = writable(false); 6 | 7 | export const _isAuthModalOpen = writable(false); 8 | -------------------------------------------------------------------------------- /specs/responses/examples/geo_centroid/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:eurito_crunchbase_v0,form:primary,forms:!((name:primary,selection:(aggregation:geo_centroid,field:coordinate_of_city,type:geoPointObject)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/histogram/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,params:(interval:1),selection:(aggregation:histogram,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/route.context.js: -------------------------------------------------------------------------------- 1 | import {writable} from 'svelte/store'; 2 | 3 | export const createRouteStores = () => ({ 4 | dataset: writable({}), 5 | queryURL: writable(''), 6 | _neededFields: writable([]), 7 | }); 8 | -------------------------------------------------------------------------------- /specs/responses/examples/value_count/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:value_count,field:placeName_country_organisation,type:textWithKeyword)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/children.todo.js: -------------------------------------------------------------------------------- 1 | // filter agg 2 | // https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-bucket-children-aggregation.html 3 | 4 | // children: { 5 | // type: string 6 | // }, 7 | -------------------------------------------------------------------------------- /specs/responses/examples/date_histogram/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:arxlive_arxiv_v4,form:primary,forms:!((name:primary,params:(interval:%271y%27),selection:(aggregation:date_histogram,field:date_created_article,type:date)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /specs/responses/examples/terms/query.txt: -------------------------------------------------------------------------------- 1 | /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,params:(missing:%27%27,size:20),selection:(aggregation:terms,field:terms_mesh_abstract,type:textWithKeywordArray)),(name:secondary)),size:0) 2 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/[any-any]-[any-any].js: -------------------------------------------------------------------------------- 1 | /** 2 | * Applies a function only if the argument is truthy. 3 | * @param {function} func The function. 4 | * @param {*} arg The argument. 5 | */ 6 | export const safeApply = func => arg => arg && func(arg); 7 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/README.md: -------------------------------------------------------------------------------- 1 | - Visualisers: 2 | - old: https://xstate.js.org/viz/?gist=2831cffd35e9962b1fbaddb3a2a90a7b 3 | - new: https://stately.ai/viz/92dcf5ca-1337-4b3d-9aeb-dcbb100666c0 4 | - To copy this machine to the clipboard: `npm run copy_explore_route` 5 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/tester.js: -------------------------------------------------------------------------------- 1 | import { Machine } from 'xstate'; 2 | import { builderTesterConfig } from './tester.config.js'; 3 | import { builderTesterOptions } from './tester.options.js'; 4 | 5 | export const BuilderTestMachine = Machine(builderTesterConfig, builderTesterOptions); 6 | -------------------------------------------------------------------------------- /fe/src/lib/app/utils/events.js: -------------------------------------------------------------------------------- 1 | export const makeHandlerKeyAdapter = keys => handler => (...args) => event => { 2 | if (keys.includes(event.key)) { 3 | event.preventDefault(); 4 | handler(...args); 5 | } 6 | } 7 | 8 | export const makeStandardKeyAdapter = makeHandlerKeyAdapter(['Enter', ' ']); 9 | -------------------------------------------------------------------------------- /specs/responses/examples/geo_bounds/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "bounds": { 4 | "top_left": { 5 | "lat": 70.80294397193938, 6 | "lon": -170.82805601879954 7 | }, 8 | "bottom_right": { 9 | "lat": -53.16254503559321, 10 | "lon": 178.44216596335173 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/displays/Location.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 |
    6 | lat: {location.lat}, lon: {location.lon} 7 |
    8 | 9 | 15 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/filters.todo.js: -------------------------------------------------------------------------------- 1 | // filter agg 2 | // /search-aggregations-bucket-filters-aggregation.html 3 | 4 | // filters: { 5 | // other_bucket: optional(booleanD(false, true)), // TODO not explicitly stated in the docs, must test 6 | // other_bucket_key: optional(stringD('_other_')), 7 | // filters: arrayOf(object) 8 | // }, 9 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/[any-boolean]-[array-boolean].js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @svizzle/utils/[any-boolean]-[array-boolean] 3 | */ 4 | 5 | import * as _ from 'lamb'; 6 | import {isNotNil} from '@svizzle/utils'; 7 | 8 | import {ψ} from './array-[any-any].js'; 9 | 10 | export const occursWith = predicate => ψ(_.findWhere(predicate), isNotNil); 11 | -------------------------------------------------------------------------------- /specs/responses/examples/percentiles/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "values": { 4 | "1.0": 5999.9276611275045, 5 | "5.0": 23167.228213570834, 6 | "25.0": 106360.53983267206, 7 | "50.0": 229029.65303158798, 8 | "75.0": 648734.3596568941, 9 | "95.0": 2568676.9023536285, 10 | "99.0": 7620599.531014212 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/docCount.js: -------------------------------------------------------------------------------- 1 | import {integer} from '$lib/types/index.js'; 2 | 3 | export default { 4 | id: 'doc_count', 5 | doc: { 6 | doc_count: 'The amount of documents.', 7 | }, 8 | docLong: {}, 9 | shape: { 10 | doc_count: integer 11 | }, 12 | tag: 'single-value', 13 | } 14 | 15 | /* 16 | { 17 | "doc_count": 357361 18 | } 19 | */ 20 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/test/TODO.md: -------------------------------------------------------------------------------- 1 | - all aggs 2 | - have `id` === file name 3 | - have `availability` and at least `availability.from` 4 | - have `response` 5 | - have `request` 6 | - have `requestDoc` 7 | - `requestDoc` have all the paths on `request` 8 | - have `tag` being one of `metric`, `bucketing` 9 | - all responses 10 | - have `id` 11 | - have `tag` 12 | -------------------------------------------------------------------------------- /be/src/conf.js: -------------------------------------------------------------------------------- 1 | export const CROSS_ORIGIN_DOMAINS = [ 2 | /.*dapsboard-dev\.netlify\.app/u, 3 | /.*dapsboard-staging\.netlify\.app/u 4 | ]; 5 | 6 | export const PROTECTED_DATASETS = [ 7 | 'epc_with_hp_install_dates', 8 | 'processed_mcs_installations', 9 | 'hpmt_gold_interim_v0', 10 | 'hpmt_gold_interim_v1', 11 | 'hpmt_gold_interim_v2', 12 | 'hpmt_gold_interim_v3' 13 | ]; 14 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/results.context.js: -------------------------------------------------------------------------------- 1 | import {writable} from 'svelte/store'; 2 | 3 | export const createResultsStores = () => ({ 4 | cachedResults: writable({}), 5 | cacheKey: writable(), 6 | currentResult: writable(), 7 | nextAggsHierarchy: writable({}), 8 | queriedHierarchy: writable({}), 9 | queuedAggs: writable({}), 10 | queueResults: writable([]), 11 | }); 12 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/obj-[string-any].js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @svizzle/utils/obj-[string-any] 3 | */ 4 | 5 | /** 6 | * Curried function that retrieves the value of `key` in `obj``, 7 | * but only if the latter is truthy. 8 | * @param {*} obj The object to retrieve from. 9 | * @param {string} key The key to look for. 10 | */ 11 | export const safeGetKeyOf = obj => key => obj && obj[key]; 12 | -------------------------------------------------------------------------------- /fe/vite.config.js: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line import/no-unresolved 2 | import {sveltekit} from '@sveltejs/kit/vite'; 3 | 4 | /** @type {import('vite').UserConfig} */ 5 | const config = { 6 | plugins: [ 7 | sveltekit() 8 | ], 9 | server: { 10 | fs: { 11 | // Allow serving files from one level up to the project root 12 | allow: ['..'] 13 | } 14 | } 15 | }; 16 | 17 | export default config; 18 | -------------------------------------------------------------------------------- /fe/src/lib/utils/ids.js: -------------------------------------------------------------------------------- 1 | /* 2 | This file is imported by `src/bin/make_data.js` 3 | - we can't import files generated by it like `data/datasets.json` in here 4 | - we can't import @svizzle/ui as its index exports `.svelte` files 5 | */ 6 | 7 | import {isClientSide} from '@svizzle/ui'; 8 | 9 | let lastId = 0; 10 | export function autoID () { 11 | return `${isClientSide ? 'client' : 'server'}${lastId++}`; 12 | } 13 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/form.js: -------------------------------------------------------------------------------- 1 | import { createMachina } from '../utils.js'; 2 | import { Machine } from 'xstate'; 3 | 4 | import { formConfig } from './form.config.js'; 5 | import { formOptions } from './form.options.js'; 6 | 7 | export const createFormMachine = ctx => createMachina( 8 | formConfig, 9 | formOptions, 10 | ctx 11 | ); 12 | 13 | export const formTemplate = Machine(formConfig, formOptions); 14 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/displays/ValueDisplay.svelte: -------------------------------------------------------------------------------- 1 | 5 | 6 |
    7 | {#if label} 8 |
    {label}
    9 | {/if} 10 |
    {value}
    11 |
    12 | 13 | 22 | -------------------------------------------------------------------------------- /fe/static/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "background_color": "#ffffff", 3 | "theme_color": "#333333", 4 | "name": "Dapsboard", 5 | "short_name": "Dapsboard", 6 | "display": "minimal-ui", 7 | "start_url": "/", 8 | "icons": [ 9 | { 10 | "src": "logo-192.png", 11 | "sizes": "192x192", 12 | "type": "image/png" 13 | }, 14 | { 15 | "src": "logo-512.png", 16 | "sizes": "512x512", 17 | "type": "image/png" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /fe/src/lib/env.js: -------------------------------------------------------------------------------- 1 | export const isDev = import.meta.env?.DEV || true; 2 | 3 | /* cache */ 4 | 5 | const cacheEnv = import.meta.env?.VITE_CACHE_ENV || 'dev'; // local, dev, prod 6 | const cacheURLs = { 7 | dev: 'https://dapsboard.cache.dev.dap-tools.uk', 8 | local: 'http://localhost:4000', 9 | staging: 'https://dapsboard.cache.staging.dap-tools.uk' 10 | }; 11 | export const selectedCacheURL = cacheURLs[cacheEnv]; 12 | export const useCache = true; 13 | -------------------------------------------------------------------------------- /specs/responses/examples/extended_stats/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "count": 796084, 4 | "min": 1, 5 | "max": 2765946854, 6 | "avg": 741707.0497661051, 7 | "sum": 590461115006, 8 | "sum_of_squares": 15295081725221380000, 9 | "variance": 18662769950920.6, 10 | "std_deviation": 4320042.818181389, 11 | "std_deviation_bounds": { 12 | "upper": 9381792.686128883, 13 | "lower": -7898378.586596673 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Dapsboard web app", 3 | "devDependencies": { 4 | "@babel/eslint-parser": "^7.19.1", 5 | "@babel/plugin-syntax-import-assertions": "^7.20.0", 6 | "eslint": "^8.32.0", 7 | "eslint-plugin-import": "^2.27.5", 8 | "eslint-plugin-node": "^11.1.0", 9 | "husky": "^4.3.0" 10 | }, 11 | "license": "MIT", 12 | "name": "dapsboard_app", 13 | "repository": "github:nestauk/dapsboard", 14 | "version": "0.0.8" 15 | } 16 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/utils/suggestions.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {isIterableEmpty} from '@svizzle/utils'; 3 | 4 | export const makeGetSuggestionsBy = searchQuery => _.pipe([ 5 | _.filterWith(sugg => sugg.key !== searchQuery), 6 | _.sortWith([ 7 | _.sorterDesc(_.getKey('doc_count')), 8 | _.getKey('key') 9 | ]), 10 | _.mapWith(sugg => `${sugg.key} (${sugg.doc_count})`), 11 | _.when(isIterableEmpty, _.always(['-- no suggestions found --'])) 12 | ]); 13 | -------------------------------------------------------------------------------- /fe/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | %sveltekit.head% 11 | 12 | 13 |
    %sveltekit.body%
    14 | 15 | 16 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/adjacency_matrix.todo.js: -------------------------------------------------------------------------------- 1 | // multi-field 2 | // 7.10: no params table 3 | // https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-bucket-adjacency-matrix-aggregation.html 4 | 5 | // adjacency_matrix: { 6 | // filters: recordLike({ 7 | // values: recordLike({ 8 | // keys: enumsOf(['aggId']), 9 | // values: recordLike({ 10 | // keys: enumsOf(['field']), 11 | // values: arrayOf(string) 12 | // }) 13 | // }) 14 | // }), 15 | // separator: string 16 | // }, 17 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/history.config.js: -------------------------------------------------------------------------------- 1 | export const historyConfig = { 2 | id: 'History', 3 | initial: 'Idle', 4 | states: { 5 | Idle: { 6 | on: { 7 | RESULTS_UPDATED: { 8 | target: 'Idle', 9 | actions: [ 10 | 'updateCurrentURL', 11 | 'sendCurrentUrlUpdated', 12 | 'conditionalLog' 13 | ] 14 | }, 15 | CURRENT_URL_UPDATED: { 16 | target: 'Idle', 17 | actions: [ 18 | 'updateEntry', 19 | 'conditionalLog' 20 | ] 21 | } 22 | }, 23 | }, 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/history.config.js: -------------------------------------------------------------------------------- 1 | export const historyConfig = { 2 | initial: 'Saved', 3 | states: { 4 | Saved: { 5 | on: { 6 | EDITED: { 7 | target: 'Editing', 8 | actions: ['initEntry'] 9 | } 10 | } 11 | }, 12 | Editing: { 13 | on: { 14 | EDITED: { 15 | target: 'Editing', 16 | actions: ['updateEntry'] 17 | }, 18 | COMMITTED: { 19 | target: 'Saved', 20 | cond: 'isActiveForm', 21 | actions: ['commitLastGoodURL'] 22 | } 23 | } 24 | }, 25 | } 26 | }; 27 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/displays/ObjectDisplay.svelte: -------------------------------------------------------------------------------- 1 | 11 | 12 |
    13 |
    {label}
    14 | {#each pairs as [key, value]} 15 | {#if !isNaN(value)} 16 | 17 | {/if} 18 | {/each} 19 |
    20 | 21 | 26 | -------------------------------------------------------------------------------- /fe/src/lib/app/utils/data.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {isKeyValue} from '@svizzle/utils'; 3 | 4 | import DATASETS from '../data/datasets.json' assert {type: 'json'}; 5 | import {getApiVersion, getDatasetIdOf, getSchema} from '../../utils/specs.js'; 6 | 7 | export const findDatasetById = id => DATASETS.find(isKeyValue(['id', id])); 8 | export const getDatasetOf = _.pipe([getDatasetIdOf, findDatasetById]); 9 | export const getSchemaOf = _.pipe([getDatasetOf, getSchema]); 10 | export const getApiVersionOf = _.pipe([getDatasetOf, getApiVersion]); 11 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/docs.config.js: -------------------------------------------------------------------------------- 1 | export const docsConfig = { 2 | initial: 'Idle', 3 | states: { 4 | Idle: { 5 | on: { 6 | FIELD_DOC_SHOWN: { 7 | target: 'Idle', 8 | actions: ['setFieldDoc'] 9 | }, 10 | FIELD_DOC_DEFAULT: { 11 | target: 'Idle', 12 | actions: ['resetFieldDoc'] 13 | }, 14 | AGG_DOC_SHOWN: { 15 | target: 'Idle', 16 | actions: ['setAggDoc'] 17 | }, 18 | AGG_DOC_DEFAULT: { 19 | target: 'Idle', 20 | actions: ['resetAggDoc'] 21 | }, 22 | } 23 | } 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/README.md: -------------------------------------------------------------------------------- 1 | ## builder route 2 | 3 | - Visualisers: 4 | - old: https://xstate.js.org/viz/?gist=b2abeff6fdff27087c8e2c5ad75d83f0 5 | - new: https://stately.ai/viz/62acf9a5-98dc-43af-a337-d4540dfd15e4 6 | - To copy this machine to the clipboard: `npm run copy_builder_route` 7 | 8 | ## builder form 9 | 10 | - Visualisers: 11 | - old: https://xstate.js.org/viz/?gist=2c26ae02b853db5dd2e6377d123979f3 12 | - new: https://stately.ai/viz/a7e35595-0212-40eb-94e0-c66eb7e6dcc5 13 | - To copy this machine to the clipboard: `npm run copy_builder_form` 14 | -------------------------------------------------------------------------------- /fe/src/bin/machines/builder_copy_route.js: -------------------------------------------------------------------------------- 1 | import {stringify} from '@svizzle/utils'; 2 | import clip from 'clipboardy'; 3 | 4 | import {stringifyObj} from '$lib/utils/svizzle/utils/obj-string.js'; 5 | 6 | import {builderTesterConfig} from '$lib/app/machines/builder/tester.config.js'; 7 | import {builderTesterOptions} from '$lib/app/machines/builder/tester.options.js'; 8 | 9 | clip.write(`${stringify(builderTesterConfig)}, ${stringifyObj(builderTesterOptions)}`); 10 | console.log('/builder route copied to the clipboard\n'); 11 | 12 | // see https://xstate.js.org/viz/?gist=b2abeff6fdff27087c8e2c5ad75d83f0 13 | -------------------------------------------------------------------------------- /be/src/db.js: -------------------------------------------------------------------------------- 1 | import { MongoClient } from 'mongodb'; 2 | 3 | const { MONGO_ROOT_USER: user, MONGO_ROOT_PASSWORD: password } = process.env; 4 | const uri = `mongodb://${user}:${password}@mongo`; 5 | const client = new MongoClient(uri); 6 | 7 | const database = client.db('dapsboard'); 8 | export const cache = database.collection('cache'); 9 | 10 | export const cacheRequest = (request, aggregation) => { 11 | const fresh = { 12 | _id: request.hash, 13 | url: request.url, 14 | body: request.body, 15 | aggregation 16 | }; 17 | cache.updateOne({ _id: request.hash }, { $set: fresh }, { upsert: true }); 18 | } 19 | -------------------------------------------------------------------------------- /fe/src/bin/machines/explore_copy_route.js: -------------------------------------------------------------------------------- 1 | import {stringify} from '@svizzle/utils'; 2 | import clip from 'clipboardy'; 3 | 4 | import {exploreConfig} from '$lib/app/machines/explore/route.config.js'; 5 | import {exploreOptions} from '$lib/app/machines/explore/route.js'; 6 | import {stringifyObj} from '$lib/utils/svizzle/utils/obj-string.js'; 7 | 8 | // eslint-disable-next-line no-empty-function 9 | clip.write(`${stringify(exploreConfig)}, ${stringifyObj(exploreOptions, () => {})}`); 10 | console.log('/explore route copied to the clipboard\n'); 11 | 12 | // see https://xstate.js.org/viz/?gist=972d49cd6a94ea17938e67390c8dd6af 13 | -------------------------------------------------------------------------------- /fe/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./.svelte-kit/tsconfig.json", 3 | "compilerOptions": { 4 | "allowJs": true, 5 | "checkJs": false, 6 | "esModuleInterop": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "resolveJsonModule": true, 9 | "skipLibCheck": true, 10 | "sourceMap": true, 11 | "strict": true 12 | } 13 | // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias and https://kit.svelte.dev/docs/configuration#files 14 | // 15 | // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes 16 | // from the referenced tsconfig.json - TypeScript does not merge them in 17 | } 18 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/search.context.js: -------------------------------------------------------------------------------- 1 | import {writable} from 'svelte/store'; 2 | 3 | export const createBaseSearchStores = () => ({ 4 | fieldStats: writable([]), 5 | isFieldsMenuActive: writable(false), 6 | suggestions: writable([]), 7 | searchQuery: writable(''), 8 | selectedFieldName: writable(), 9 | nextFieldNames: writable([]), 10 | prevFieldNames: writable([]), 11 | userSelection: writable(), 12 | }); 13 | 14 | export const createSearchStores = () => ({ 15 | ...createBaseSearchStores(), 16 | statsCache: writable({}), 17 | statsCacheKey: writable(), 18 | suggestionsCache: writable({}), 19 | suggestionsCacheKey: writable(), 20 | }); 21 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/types/aggs.utils.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {isObjEmpty} from '@svizzle/utils'; 3 | 4 | import {hasNoDefault, nativeKey, shapeKey} from '$lib/types/index.js'; 5 | import {is_required} from '$lib/elasticsearch/types/params.utils.js'; 6 | 7 | // agg.request -> boolean 8 | export const aggHasNoRequiredParams = _.pipe([ 9 | _.skip([shapeKey, nativeKey, 'field']), 10 | _.pickIf(is_required), 11 | isObjEmpty 12 | ]); 13 | 14 | // agg.request -> boolean 15 | export const aggHasNoRequiredParamsWithoutDefault = _.pipe([ 16 | _.skip([shapeKey, nativeKey, 'field']), 17 | _.pickIf(_.allOf([is_required, hasNoDefault])), 18 | isObjEmpty 19 | ]); 20 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsDocCount.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | number, 5 | objectOf 6 | } from '$lib/types/index.js'; 7 | 8 | export default { 9 | id: 'buckets_number', 10 | doc: { 11 | key: 'The first value of the bucket extent.', 12 | doc_count: 'The amount of documents in a bucket.', 13 | }, 14 | shape: { 15 | buckets: arrayOf(objectOf({ 16 | doc_count: integer, 17 | key: number, 18 | })) 19 | }, 20 | tag: 'multi-bucket', 21 | } 22 | 23 | /* 24 | { 25 | "buckets": [ 26 | { 27 | "doc_count": 41431, 28 | "key": 1985 29 | }, 30 | { 31 | "doc_count": 29086, 32 | "key": 1986 33 | } 34 | ] 35 | } 36 | */ 37 | -------------------------------------------------------------------------------- /be/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@fastify/cors": "^8.2.0", 4 | "@fastify/middie": "^8.1.0", 5 | "commander": "^10.0.1", 6 | "dap_dv_backends_utils": "github:nestauk/dap_dv_backends_utils#v0.0.7", 7 | "fastify": "^4.11.0", 8 | "lamb": "^0.61.0", 9 | "mongodb": "^4.13.0", 10 | "nodemon": "^2.0.20" 11 | }, 12 | "description": "A cache for the ElasticSearch aggregation requests performed by dapsboard", 13 | "husky": { 14 | "hooks": { 15 | "pre-push": "npm run lint" 16 | } 17 | }, 18 | "name": "dapsboard_cache_layer", 19 | "scripts": { 20 | "dev": "nodemon src/server.js", 21 | "lint": "eslint './src/**/*.js'" 22 | }, 23 | "type": "module" 24 | } 25 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/value.js: -------------------------------------------------------------------------------- 1 | import {optional} from '$lib/elasticsearch/types/params.js'; 2 | import {number} from '$lib/types/index.js'; 3 | 4 | export default { 5 | id: 'value', 6 | doc: { 7 | value: 'The output value.', 8 | value_as_string: 'The stringified value (e.g. available for `esNumeric` types that can be converted to numbers, like dates).', 9 | }, 10 | docLong: {}, 11 | shape: { 12 | value_as_string: optional(number), 13 | value: number, 14 | }, 15 | tag: 'single-value', 16 | } 17 | 18 | /* 19 | { 20 | "value": 741707.0497661051 21 | } 22 | 23 | { 24 | "value" : 1826248545820800000 25 | "value_as_string" : "57873482-08-14" 26 | } 27 | */ 28 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/obj-string.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @svizzle/utils/obj-string 3 | */ 4 | 5 | /** 6 | * Stringifies a Javascript object, including member functions. 7 | * @param {*} obj The object to stringify. 8 | */ 9 | export function stringifyObj (obj, dummy) { 10 | let placeholder = '____PLACEHOLDER____'; 11 | let fns = []; 12 | let json = JSON.stringify(obj, function (key, value) { 13 | if (typeof value === 'function') { 14 | fns.push(dummy || value); 15 | return placeholder; 16 | } 17 | return value; 18 | }, 2); 19 | json = json.replace(new RegExp(`"${placeholder}"`, 'ug'), function () { 20 | return fns.shift(); 21 | }); 22 | return json; 23 | } 24 | -------------------------------------------------------------------------------- /fe/src/lib/app/utils/data.spec.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert'; 2 | 3 | import {getDatasetOf} from '$lib/app/utils/data.js'; 4 | 5 | import general_arxiv_v0 from '$lib/test_assets/general_arxiv_v0.json' assert {type: 'json'}; 6 | 7 | describe('utils/data.js', function () { 8 | 9 | describe('getDatasetOf', function () { 10 | it('should get the right dataset - general_arxiv_v0', function () { 11 | const source = 'arxiv'; 12 | const project = 'general'; 13 | const version = 0; 14 | 15 | const actual = getDatasetOf({project, source, version}); 16 | const expected = general_arxiv_v0; 17 | 18 | assert.deepStrictEqual(actual, expected); 19 | }); 20 | }); 21 | 22 | }); 23 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/geoCentroid.js: -------------------------------------------------------------------------------- 1 | import {integer, objectOf} from '$lib/types/index.js'; 2 | import {latitude, longitude} from '$lib/elasticsearch/types/fields.js'; 3 | 4 | export default { 5 | id: 'geo_centroid', 6 | doc: { 7 | 'location.lat': 'The centroid latitude.', 8 | 'location.lon': 'The centroid longitude.', 9 | count: 'The amount of documents used to compute the centroid.', 10 | }, 11 | shape: { 12 | location: objectOf({ 13 | lat: latitude, 14 | lon: longitude, 15 | }), 16 | count: integer 17 | }, 18 | tag: 'multi-value', 19 | } 20 | 21 | /* 22 | { 23 | "location": { 24 | "lat": 35.873757200922626, 25 | "lon": -35.96746732692259 26 | }, 27 | "count": 545302 28 | } 29 | */ 30 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/nested.todo.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {string} from '$lib/types/index.js'; 3 | 4 | export default { 5 | id: 'nested', 6 | availability: { 7 | from: '1.3' 8 | }, 9 | docPath: '/search-aggregations-bucket-nested-aggregation.html', 10 | docs: 'A special single bucket aggregation that enables aggregating nested documents.', 11 | fieldType: esSearchableField, 12 | label: 'Nested', 13 | request: { 14 | // TODO Check: no `field`, no `missing` 15 | path: string 16 | }, 17 | requestDoc: { 18 | path: 'Path of the nested documents within the top level documents.', 19 | }, 20 | subAggs: true, 21 | tag: 'bucketing', 22 | }; 23 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/geoBounds.js: -------------------------------------------------------------------------------- 1 | import {objectOf} from '$lib/types/index.js'; 2 | import {geoPointObject} from '$lib/elasticsearch/types/fields.js'; 3 | 4 | export default { 5 | id: 'geo_bounds', 6 | doc: { 7 | top_left: 'Top-left coordinate of the bounds.', 8 | bottom_right: 'Bottom-right coordinate of the bounds.', 9 | }, 10 | shape: { 11 | bounds: objectOf({ 12 | bottom_right: geoPointObject, 13 | top_left: geoPointObject, 14 | }) 15 | }, 16 | // tag: 'multi-value', ? TODO 17 | } 18 | 19 | /* 20 | { 21 | "bounds": { 22 | "bottom_right": { 23 | "lat": 40.715, 24 | "lon": -73.983 25 | }, 26 | "top_left": { 27 | "lat": 40.722, 28 | "lon": -74.011 29 | } 30 | } 31 | } 32 | */ 33 | -------------------------------------------------------------------------------- /be/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | api: 4 | image: node:18-alpine 5 | command: sh -c "npm install && npm run dev" 6 | working_dir: /server 7 | volumes: 8 | - .:/server 9 | ports: 10 | - 4000:4000 11 | environment: 12 | - PORT=4000 13 | - AWS_ACCESS_KEY_ID 14 | - AWS_SECRET_ACCESS_KEY 15 | - AWS_DEFAULT_REGION 16 | - MONGO_ROOT_USER 17 | - MONGO_ROOT_PASSWORD 18 | mongo: 19 | image: "mongo" 20 | ports: 21 | - 27017:27017 22 | volumes: 23 | - mongodb:/data/db 24 | environment: 25 | - MONGO_INITDB_ROOT_USERNAME=${MONGO_ROOT_USER} 26 | - MONGO_INITDB_ROOT_PASSWORD=${MONGO_ROOT_PASSWORD} 27 | 28 | volumes: 29 | mongodb: -------------------------------------------------------------------------------- /githooks/post-receive: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Change this to staging on the staging server 4 | branch="dev" 5 | 6 | while read oldrev newrev ref 7 | do 8 | if [[ $ref =~ .*/$branch$ ]]; then 9 | echo "$branch ref received. Deploying $branch branch..." 10 | git --work-tree=$HOME/dapsboard --git-dir=$HOME/dapsboard.git checkout $branch -f 11 | cd $HOME/dapsboard/be 12 | export MONGO_ROOT_USER= 13 | export MONGO_ROOT_PASSWORD= 14 | docker compose down 15 | docker compose build 16 | docker compose up -d 17 | else 18 | echo "Ref $ref successfully received. Doing nothing: only the $branch branch may be deployed on this server." 19 | fi 20 | done -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.0.8 2 | 3 | - fe: use SvelteKit 4 | 5 | # 0.0.7 6 | 7 | - Moved the front-end to `fe/` 8 | 9 | # 0.0.6 10 | 11 | - Added type system based on TypeScript 12 | - Added form generation based on selected dataset, field and aggregation specs 13 | 14 | # 0.0.5 15 | 16 | - Add the aggregation builder (#21) 17 | 18 | # 0.0.4 19 | 20 | - Upgraded ESlint and some linting rules (#28) 21 | - Setup linting as a precommit hook (#24) 22 | - Add new datasets (#17, #22) 23 | - Use `eurito-dev` endpoint (#19) 24 | 25 | # 0.0.3 26 | 27 | - Add simple aggregations (#4) 28 | 29 | # 0.0.2 30 | 31 | - New spec format (#7, #11) 32 | - run data script automatically (#13) 33 | - fix linting (#15) 34 | 35 | # 0.0.1 36 | 37 | - Barebone app (#1, #2, #9) 38 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/bin/makeAggToResponseId.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /* eslint-disable 4 | node/shebang 5 | */ 6 | 7 | import path from 'node:path'; 8 | import {fileURLToPath} from 'node:url'; 9 | 10 | import {tapMessage} from '@svizzle/dev'; 11 | import {saveExportedObj} from '@svizzle/file'; 12 | import * as _ from 'lamb'; 13 | 14 | import * as aggs from '../spec/index.js'; 15 | 16 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 17 | 18 | const PATH = path.resolve(__dirname, '../ref/aggToResponseType.js'); 19 | const makeMap = _.mapValuesWith(_.getPath('response.id')); 20 | 21 | const mapStr = makeMap(aggs); 22 | 23 | saveExportedObj(PATH, '\t')(mapStr) 24 | .then(tapMessage(`Saved ${PATH}`)) 25 | .catch(err => console.error(err)); 26 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/utils/version.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {splitByDot} from '@svizzle/utils'; 3 | 4 | const splitVersion = _.pipe([splitByDot, _.mapWith(Number)]); 5 | 6 | export const makeIsAggVersionCompatible = refVerString => { 7 | const [refMajor, refMinor] = splitVersion(refVerString); 8 | 9 | return agg => { 10 | const [fromMajor, fromMinor] = splitVersion(agg.availability.from); 11 | 12 | let pass = 13 | refMajor > fromMajor 14 | || refMajor === fromMajor && refMinor >= fromMinor; 15 | 16 | if (pass && agg.availability.to) { 17 | const [toMajor, toMinor] = splitVersion(agg.availability.to); 18 | 19 | pass = 20 | refMajor < toMajor 21 | || refMajor === toMajor && refMinor <= toMinor; 22 | } 23 | 24 | return pass; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/stats.js: -------------------------------------------------------------------------------- 1 | import {integer, number} from '$lib/types/index.js'; 2 | 3 | export default { 4 | id: 'stats', 5 | doc: { 6 | avg: 'The average of the value extracted from the documents.', 7 | count: 'The amount of documents.', 8 | max: 'The max of all the values extracted from the documents.', 9 | min: 'The min of all the values extracted from the documents.', 10 | sum: 'The sum of the values extracted from the documents.', 11 | }, 12 | docLong: {}, 13 | shape: { 14 | avg: number, 15 | count: integer, 16 | max: number, 17 | min: number, 18 | sum: number, 19 | }, 20 | tag: 'multi-value', 21 | } 22 | 23 | /* 24 | { 25 | "avg": 741707.0497661051, 26 | "count": 796084, 27 | "max": 2765946854, 28 | "min": 1, 29 | "sum": 590461115006 30 | } 31 | */ 32 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/types/params.utils.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {optionalKey} from '$lib/elasticsearch/types/params.js'; 3 | 4 | /* conditions */ 5 | 6 | export const is_optional = _.hasKey(optionalKey); 7 | export const is_required = _.not(is_optional); 8 | 9 | /* dimensional types */ 10 | 11 | /* TODO 12 | export const isValidInterval 13 | - check it ends with a string in interval.units 14 | - remove the unit 15 | - @svizzle/utils.isValidNumber 16 | 17 | export const isValidCalendarInterval 18 | - check it ends with a string in calendarInterval.units 19 | - remove the unit 20 | - @svizzle/utils.isValidNumber 21 | 22 | export const isValidFixedInterval 23 | - check it ends with a string in fixedInterval.units 24 | - remove the unit 25 | - @svizzle/utils.isValidNumber 26 | */ 27 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/SelectMenu.svelte: -------------------------------------------------------------------------------- 1 | 15 | 16 | 17 | 18 | 23 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/route.js: -------------------------------------------------------------------------------- 1 | import { createMachina } from '../utils.js'; 2 | 3 | import { formEditingOptions } from './formediting.options.js'; 4 | import { docsOptions } from './docs.options.js'; 5 | import { historyOptions } from './history.options.js'; 6 | import { routeConfig } from './route.config.js'; 7 | import { createFormEditingStores } from './formediting.context.js'; 8 | 9 | export const options = { 10 | actions: { 11 | ...formEditingOptions.actions, 12 | ...docsOptions.actions, 13 | ...historyOptions.actions, 14 | }, 15 | guards: { 16 | ...formEditingOptions.guards, 17 | ...docsOptions.guards, 18 | ...historyOptions.guards, 19 | } 20 | }; 21 | 22 | export const createBuilderMachine = () => createMachina( 23 | routeConfig, 24 | options, 25 | createFormEditingStores() 26 | ); 27 | -------------------------------------------------------------------------------- /fe/src/routes/+error.svelte: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | Dapsboard - {status} 10 | 14 | 15 | 16 |

    {status}

    17 | 18 |

    {error?.message || 'Message not defined'}

    19 | 20 | {#if isDev && error?.stack} 21 |
    {error?.stack}
    22 | {/if} 23 | 24 | 45 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/geo_centroid.js: -------------------------------------------------------------------------------- 1 | import {geoPointObject} from '$lib/elasticsearch/types/fields.js'; 2 | import {string} from '$lib/types/index.js'; 3 | import {field} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 4 | import response from '$lib/elasticsearch/aggs/response/geoCentroid.js'; 5 | 6 | export default { 7 | id: 'geo_centroid', 8 | availability: { 9 | from: '2.1' 10 | }, 11 | docPath: '/search-aggregations-metrics-geocentroid-aggregation.html', 12 | docs: 'Computes the weighted centroid from all coordinate values for geo fields.', 13 | fieldType: geoPointObject, 14 | label: 'Geo Centroid', 15 | request: { // [0] 16 | field: string, 17 | }, 18 | requestDoc: { 19 | field, 20 | }, 21 | response, 22 | tag: 'metric', 23 | version: '7.9', 24 | }; 25 | 26 | // [0] 7.9: no params table 27 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/route.config.js: -------------------------------------------------------------------------------- 1 | import { formEditingConfig } from './formediting.config.js'; 2 | import { docsConfig } from './docs.config.js'; 3 | import { historyConfig } from './history.config.js'; 4 | 5 | export const routeConfig = { 6 | id: 'BuilderRoute', 7 | initial: 'Loading', 8 | states: { 9 | Loading: { 10 | on: { 11 | READY: { 12 | target: '#BuilderRoute.Interactive', 13 | actions: [ 14 | 'resetForms', 15 | ] 16 | } 17 | } 18 | }, 19 | Interactive: { 20 | type: 'parallel', 21 | states: { 22 | Docs: docsConfig, 23 | FormEditing: formEditingConfig, 24 | History: historyConfig, 25 | }, 26 | on: { 27 | ROUTE_CHANGED: { 28 | target: '#BuilderRoute.Interactive', 29 | actions: [ 30 | 'resetForms', 31 | ] 32 | } 33 | } 34 | } 35 | } 36 | }; 37 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/formediting.context.js: -------------------------------------------------------------------------------- 1 | import { writable } from 'svelte/store'; 2 | import { DEFAULT_FIELD_DOCS, DEFAULT_AGG_DOCS } from './docs.options.js'; 3 | 4 | export function createFormEditingStores () { 5 | return { 6 | // config 7 | hideDisabledForms: writable(true), 8 | hideDisabledAggregations: writable(false), 9 | hideDisabledDatasets: writable(false), 10 | hideDisabledFields: writable(true), 11 | runQueryOnSelect: writable(true), 12 | selectedRequestTab: writable('fields'), 13 | showFullResponse: writable(false), 14 | // doc strings 15 | activeDocs: writable(DEFAULT_FIELD_DOCS), 16 | aggDocText: writable(DEFAULT_AGG_DOCS), 17 | // builder stores 18 | dataset: writable(null), 19 | forms: writable([]), 20 | resultSize: writable(0), 21 | selectedForm: writable(), 22 | isParsing: writable(false), 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsGeohashGrid.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | objectOf, 5 | string 6 | } from '$lib/types/index.js'; 7 | 8 | export default { 9 | id: 'buckets_geohash_grid', 10 | doc: { 11 | doc_count: 'The amount of documents in a bucket.', 12 | key: 'The hash of a cell.', 13 | }, 14 | shape: { 15 | buckets: arrayOf(objectOf({ 16 | doc_count: integer, 17 | key: string, 18 | })) 19 | }, 20 | tag: 'multi-bucket', 21 | } 22 | 23 | /* 24 | { 25 | "buckets": [ 26 | { 27 | "doc_count": 3, 28 | "key": "u17" 29 | }, 30 | { 31 | "doc_count": 2, 32 | "key": "u09" 33 | } 34 | ] 35 | } 36 | 37 | High precision geohashes have a long string length and represent cells that cover only a small area. 38 | Low precision geohashes have a short string length and represent cells that each cover a large area. 39 | */ 40 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/history.options.js: -------------------------------------------------------------------------------- 1 | import {get} from 'svelte/store'; 2 | import {assign, send} from 'xstate'; 3 | 4 | import {makeExplorePath} from '$lib/app/utils/exploreUtils.js'; 5 | 6 | const updateCurrentURL = ctx => { 7 | console.log('ctx', ctx) 8 | const {project, source, version} = get(ctx.dataset); 9 | const fields = get(ctx.selectedFields); 10 | const neededFields = get(ctx._neededFields); 11 | const url = makeExplorePath({fields, neededFields, project, source, version}); 12 | ctx.currentURL.set(url); 13 | return ctx; 14 | } 15 | 16 | export const historyOptions = { 17 | actions: { 18 | updateCurrentURL: assign(updateCurrentURL), 19 | sendCurrentUrlUpdated: send('CURRENT_URL_UPDATED'), 20 | updateEntry: ctx => { 21 | globalThis.history && history.pushState(null, window.title, get(ctx.currentURL)); 22 | } 23 | }, 24 | guards: {} 25 | }; 26 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/sampler.todo.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional} from '$lib/elasticsearch/types/params.js'; 3 | import {integerD} from '$lib/types/index.js'; 4 | 5 | export default { 6 | id: 'sampler', 7 | availability: { 8 | from: '2.0' 9 | }, 10 | docPath: '/search-aggregations-bucket-sampler-aggregation.html', 11 | docs: 'A filtering aggregation used to limit any sub aggregations processing to a sample of the top-scoring documents.', 12 | fieldType: esSearchableField, 13 | label: 'Sampler', 14 | request: { 15 | shard_size: optional(integerD(100)) 16 | }, 17 | requestDoc: { 18 | shard_size: 'The shard_size parameter limits how many top-scoring documents are collected in the sample processed on each shard. The default value is 100.' 19 | }, 20 | subAggs: true, 21 | tag: 'bucketing', 22 | }; 23 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/boxplot.js: -------------------------------------------------------------------------------- 1 | import {integer, float, number} from '$lib/types/index.js'; 2 | 3 | export default { 4 | id: 'boxplot', 5 | doc: { 6 | count: 'The number of non-empty fields counted.', 7 | max: 'The largest data point excluding any outliers.', 8 | min: 'The lowest data point excluding any outliers.', 9 | q1: 'Also known as the lower quartile qn(0.25), is the median of the lower half of the dataset.', 10 | q2: 'The middle value of the dataset.', 11 | q3: 'Also known as the upper quartile qn(0.75), is the median of the upper half of the dataset.', 12 | }, 13 | shape: { 14 | count: integer, 15 | max: number, 16 | min: number, 17 | q1: float, 18 | q2: float, 19 | q3: float, 20 | }, 21 | tag: 'multi-value', 22 | } 23 | 24 | /* 25 | { 26 | "max": 990.0, 27 | "min": 0.0, 28 | "q1": 165.0, 29 | "q2": 445.0, 30 | "q3": 725.0 31 | } 32 | */ 33 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsNumberAuto.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | number, 5 | objectOf 6 | } from '$lib/types/index.js'; 7 | 8 | export default { 9 | id: 'buckets_number_auto', 10 | doc: { 11 | doc_count: 'The amount of documents in a bucket.', 12 | key: 'The first value of the bucket extent.', 13 | max: 'The maximum value of the bucket.', 14 | min: 'The minimum value of the bucket.', 15 | }, 16 | shape: { 17 | buckets: arrayOf(objectOf({ 18 | doc_count: integer, 19 | key: number, 20 | max: number, 21 | min: number, 22 | })) 23 | }, 24 | tag: 'multi-bucket', 25 | } 26 | 27 | /* 28 | { 29 | "buckets": [ 30 | { 31 | "doc_count": 2, 32 | "key": 30.0, 33 | "max": 50.0, 34 | "min": 10.0 35 | }, 36 | { 37 | "doc_count": 5, 38 | "key": 185.0, 39 | "max": 200.0, 40 | "min": 150.0 41 | } 42 | ] 43 | } 44 | */ 45 | -------------------------------------------------------------------------------- /fe/src/bin/machines/builder_copy_form.js: -------------------------------------------------------------------------------- 1 | import {stringify} from '@svizzle/utils'; 2 | import clip from 'clipboardy'; 3 | 4 | import {stringifyObj} from '$lib/utils/svizzle/utils/obj-string.js'; 5 | 6 | import {formConfig} from '$lib/app/machines/builder/form.config.js'; 7 | import {builderTesterOptions} from '$lib/app/machines/builder/tester.options.js'; 8 | 9 | clip.write(`${stringify({ 10 | ...formConfig, 11 | context: { 12 | autoExecute: false, 13 | cached: false, 14 | hideDisabledAxes: true, 15 | hideDisabledAggs: false, 16 | hideDisabledDatasets: false, 17 | hideDisabledItems: true, 18 | matching: false, 19 | selectionComplete: false, 20 | showFullResponse: false, 21 | queryReady: false 22 | } 23 | })}, ${stringifyObj(builderTesterOptions)}`); 24 | console.log('/builder form copied to the clipboard\n'); 25 | 26 | // see https://xstate.js.org/viz/?gist=2c26ae02b853db5dd2e6377d123979f3 27 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/docs.options.js: -------------------------------------------------------------------------------- 1 | export const DEFAULT_FIELD_DOCS = 'Click on a field for docs.'; 2 | export const DEFAULT_AGG_DOCS = 'Hover on an aggregation for short description or click on the outgoing link for full reference.'; 3 | 4 | export const docsOptions = { 5 | actions: { 6 | /** 7 | * Resets the agg doc string to the default text. 8 | */ 9 | resetAggDoc: ctx => ctx.aggDocText.set(DEFAULT_AGG_DOCS), 10 | /** 11 | * Resets the field doc string to the default text. 12 | */ 13 | resetFieldDoc: ctx => ctx.activeDocs.set(DEFAULT_FIELD_DOCS), 14 | /** 15 | * Sets the agg doc string to be displayed in the UI 16 | */ 17 | setAggDoc: (ctx, {docstring}) => ctx.aggDocText.set(docstring), 18 | /** 19 | * Sets the field doc string to be displayed in the UI 20 | */ 21 | setFieldDoc: (ctx, {docstring}) => ctx.activeDocs.set(docstring), 22 | }, 23 | guards: { 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /.github/workflows/ci_cd.yml: -------------------------------------------------------------------------------- 1 | name: Push to remote 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'dev' 7 | - 'staging' 8 | 9 | jobs: 10 | push: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v3 15 | with: 16 | fetch-depth: 0 17 | - name: Push to remote server 18 | env: 19 | PRIVATE_KEY: ${{ secrets.BACKEND_SSH_PRIVATE_KEY }} 20 | BRANCH: ${{ github.ref_name }} 21 | run: | 22 | install -m 600 -D /dev/null ~/.ssh/private-key.pem 23 | echo "$PRIVATE_KEY" > ~/.ssh/private-key.pem 24 | ssh-keyscan -H dapsboard.cache.$BRANCH.dap-tools.uk > ~/.ssh/known_hosts 25 | git config core.sshCommand 'ssh -i ~/.ssh/private-key.pem' 26 | git remote add $BRANCH ubuntu@dapsboard.cache.$BRANCH.dap-tools.uk:/home/ubuntu/dapsboard.git 27 | git push $BRANCH --force -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsGeotileGrid.js: -------------------------------------------------------------------------------- 1 | import {arrayOf, integer, objectOf} from '$lib/types/index.js'; 2 | import {zoomXYString} from '$lib/elasticsearch/types/response.js'; 3 | 4 | export default { 5 | id: 'buckets_geotile_grid', 6 | doc: { 7 | doc_count: 'The amount of documents in a bucket.', 8 | key: 'Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal to the user-specified precision. See https://wiki.openstreetmap.org/wiki/Zoom_levels for zoom levels.', 9 | }, 10 | shape: { 11 | buckets: arrayOf(objectOf({ 12 | doc_count: integer, 13 | key: zoomXYString, 14 | })) 15 | }, 16 | tag: 'multi-bucket', 17 | } 18 | 19 | /* 20 | { 21 | "buckets": [ 22 | { 23 | "doc_count" : 3, 24 | "key" : "8/131/84" 25 | }, 26 | { 27 | "doc_count" : 2, 28 | "key" : "8/129/88" 29 | }, 30 | { 31 | "doc_count" : 1, 32 | "key" : "8/131/85" 33 | } 34 | ] 35 | } 36 | */ 37 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/types/aggs.utils.spec.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert'; 2 | 3 | import {optional} from '$lib/elasticsearch/types/params.js'; 4 | import {integer, integerD} from '$lib/types/index.js'; 5 | 6 | import {aggHasNoRequiredParamsWithoutDefault} from './aggs.utils.js'; 7 | 8 | describe('elasticsearch/types/aggs.utils', function () { 9 | describe('aggHasNoRequiredParamsWithoutDefault', function () { 10 | it('only requireds with default', function () { 11 | const actual = aggHasNoRequiredParamsWithoutDefault({ 12 | opt: optional(integer), 13 | reqD: integerD(10), 14 | }); 15 | 16 | assert.deepStrictEqual(actual, true); 17 | }); 18 | it('requireds without default', function () { 19 | const actual = aggHasNoRequiredParamsWithoutDefault({ 20 | opt: optional(integer), 21 | reqD: integerD(10), 22 | reqNoD: integer, 23 | }); 24 | 25 | assert.deepStrictEqual(actual, false); 26 | }); 27 | }); 28 | }); 29 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/AutoComplete.svelte: -------------------------------------------------------------------------------- 1 | 18 | 19 | {#if active && visible.length > 0} 20 | 25 | {/if} 26 | 27 | 44 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsTerms.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | objectOf, 5 | string, 6 | } from '$lib/types/index.js'; 7 | 8 | export default { 9 | id: 'buckets_terms', 10 | doc: { 11 | '{bucket}.doc_count': 'The amount of documents in a bucket.', 12 | '{bucket}.key': 'The bucket term.', 13 | doc_count_error_upper_bound: 'TODO', 14 | sum_other_doc_count: 'TODO', 15 | }, 16 | docLong: {}, 17 | shape: { 18 | buckets: arrayOf( 19 | objectOf({ 20 | doc_count: integer, 21 | key: string, 22 | }) 23 | ), 24 | doc_count_error_upper_bound: integer, 25 | sum_other_doc_count: integer, 26 | }, 27 | tag: 'multi-bucket', 28 | } 29 | 30 | /* 31 | { 32 | "doc_count_error_upper_bound": 5949, 33 | "sum_other_doc_count": 2704253, 34 | "buckets": [ 35 | { 36 | "doc_count": 115416, 37 | "key": "Humans" 38 | }, 39 | { 40 | "doc_count": 77388, 41 | "key": "Animals" 42 | }, 43 | ] 44 | } 45 | */ 46 | -------------------------------------------------------------------------------- /githooks/README.md: -------------------------------------------------------------------------------- 1 | # Creating the hook on the remote server 2 | 3 | Create a bare git repo: 4 | 5 | ```sh 6 | mkdir dapsboard.git 7 | cd dapsboard.git 8 | git init --bare 9 | ``` 10 | 11 | Create post-receive hook file 12 | 13 | ```sh 14 | cd hooks 15 | touch post-receive 16 | ``` 17 | 18 | Copy contents of [post-receive](./post-receive) to this file. 19 | 20 | Please make sure that you: 21 | - Update the `branch` variable to reflect which one the remote is, i.e. either 22 | `dev` or `staging` 23 | - Set the `MONGO_ROOT_USER` and `MONGO_ROOT_PASSWORD` variables in the script 24 | 25 | Then make executable: 26 | 27 | ``` 28 | sudo chmod +x post-receive 29 | ``` 30 | 31 | Clone the bare repo: 32 | 33 | ``` 34 | cd $HOME 35 | git clone dapsboard.git 36 | ``` 37 | 38 | Make sure that docker is part of the sudo group: 39 | 40 | ```sh 41 | sudo groupadd docker 42 | sudo usermod -aG docker $USER 43 | ``` 44 | 45 | And you're done. The GH action will take care of the rest. -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/displays/AggResultView.svelte: -------------------------------------------------------------------------------- 1 | 26 | 27 |
    28 | {#if isNotNil(title)} 29 |
    {title}
    30 | {/if} 31 |
    32 | {#if component} 33 | 34 | {/if} 35 |
    36 |
    37 | 38 | 47 | -------------------------------------------------------------------------------- /fe/src/lib/types/utils.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | 3 | import {occursWith} from '$lib/utils/svizzle/utils/[any-boolean]-[array-boolean].js'; 4 | import {ψ} from '$lib/utils/svizzle/utils/array-[any-any].js'; 5 | 6 | import { 7 | getNative, 8 | getShape, 9 | is_union, 10 | isNative, 11 | isShape, 12 | isShapeWithNative, 13 | } from './index.js'; 14 | 15 | export const isSameType = type => _.anyOf([ 16 | _.allOf([ 17 | isNative, 18 | ψ(getNative, _.is(getNative(type))) 19 | ]), 20 | _.allOf([ 21 | isShape, 22 | ψ(getShape, _.is(getShape(type))) 23 | ]), 24 | _.allOf([ 25 | isShapeWithNative, 26 | ψ(getShape, _.is(getShape(type))), 27 | ψ(getNative, _.is(getNative(type))) 28 | ]) 29 | ]); 30 | 31 | export const makeIsTypeInUnion = type => 32 | ψ(_.getKey('types'), occursWith(isSameType(type))); 33 | 34 | export const makeIsTypeCompatibleWithType = type => _.anyOf([ 35 | isSameType(type), 36 | _.allOf([is_union, makeIsTypeInUnion(type)]) 37 | ]); 38 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/missing.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {string} from '$lib/types/index.js'; 3 | import {field} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 4 | import response from '$lib/elasticsearch/aggs/response/docCount.js'; 5 | 6 | export default { 7 | id: 'missing', 8 | availability: { 9 | from: '1.3' 10 | }, 11 | docPath: '/search-aggregations-bucket-missing-aggregation.html', 12 | docs: 'A field data based single bucket aggregation, that creates a bucket of all documents in the current document set context that are missing a field value (effectively, missing a field or having the configured NULL value set).', 13 | fieldType: esSearchableField, 14 | label: 'Missing', 15 | request: {// [0] 16 | field: string, 17 | }, 18 | requestDoc: { 19 | field, 20 | }, 21 | response, 22 | subAggs: true, 23 | tag: 'bucketing', 24 | version: '7.9', 25 | }; 26 | 27 | // [0] 7.9: no params table 28 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/value_count.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string} from '$lib/types/index.js'; 4 | import {field, script as scriptDoc} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 5 | import response from '$lib/elasticsearch/aggs/response/value.js'; 6 | 7 | export default { 8 | id: 'value_count', 9 | availability: { 10 | from: '1.3' 11 | }, 12 | docPath: '/search-aggregations-metrics-valuecount-aggregation.html', 13 | docs: 'Counts the number of values that are extracted from the aggregated documents.', 14 | fieldType: esSearchableField, 15 | label: 'Value Count', 16 | request: { // [0] 17 | field: string, 18 | script: optional(script), 19 | }, 20 | requestDoc: { 21 | field, 22 | script: scriptDoc, 23 | }, 24 | response, 25 | tag: 'metric', 26 | version: '7.9', 27 | }; 28 | 29 | // [0] 7.9: no params table 30 | -------------------------------------------------------------------------------- /fe/src/lib/utils/generic.js: -------------------------------------------------------------------------------- 1 | /* 2 | This file is imported by `src/bin/make_data.js` 3 | - we can't import files generated by it like `data/datasets.json` in here 4 | - we can't import @svizzle/ui as its index exports `.svelte` files 5 | */ 6 | 7 | import * as _ from 'lamb'; 8 | import {getId, isFunction} from '@svizzle/utils'; 9 | 10 | export const indexById = _.indexBy(getId); 11 | 12 | // TODO -> evaluateIfFunction 13 | export const evaluate = type => _.when(isFunction, _.applyTo([type])); 14 | 15 | export const ifExistsGetKey = prop => _.casus(_.hasKey(prop), _.getKey(prop)); 16 | 17 | export const isKeyOf = _.curry(_.has); 18 | export const getKeyOf = _.curry(_.getIn); 19 | 20 | export const descentReducer = (hasChildren, getChildren) => { 21 | const reducer = _.curryable((reducerFn, init, node) => { 22 | const acc = reducerFn(init, node); 23 | if (!hasChildren(node)) { 24 | return acc; 25 | } 26 | return getChildren(node).reduce(reducer(reducerFn), acc); 27 | }); 28 | return reducer; 29 | } 30 | -------------------------------------------------------------------------------- /specs/responses/examples/significant_text/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "doc_count": 1153445, 4 | "bg_count": 4814301, 5 | "buckets": [ 6 | { 7 | "key": "North America", 8 | "doc_count": 503375, 9 | "score": 0.7828902342051492, 10 | "bg_count": 751990 11 | }, 12 | { 13 | "key": "Europe", 14 | "doc_count": 1247, 15 | "score": 0.0018404593616494103, 16 | "bg_count": 1926 17 | }, 18 | { 19 | "key": "Africa", 20 | "doc_count": 1371, 21 | "score": 0.0018383789135547142, 22 | "bg_count": 2247 23 | }, 24 | { 25 | "key": "Asia", 26 | "doc_count": 673, 27 | "score": 0.0010554951306756097, 28 | "bg_count": 1000 29 | }, 30 | { 31 | "key": "South America", 32 | "doc_count": 344, 33 | "score": 0.0005051579977119143, 34 | "bg_count": 533 35 | }, 36 | { 37 | "key": "Oceania", 38 | "doc_count": 311, 39 | "score": 0.00041798335359491114, 40 | "bg_count": 509 41 | } 42 | ] 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/form.context.js: -------------------------------------------------------------------------------- 1 | import { writable } from 'svelte/store'; 2 | 3 | export function createFormStores () { 4 | return { 5 | // user editable properties 6 | selection: writable({ 7 | aggregation: null, 8 | field: null, 9 | type: null, 10 | }), 11 | params: writable({}), 12 | parsedQuery: writable(null), 13 | 14 | // computed select lists 15 | topBucketOptions: writable([]), 16 | bucketOptions: writable([]), 17 | bucketMultiFieldOptions: writable([]), 18 | nestedBucketOptions: writable([]), 19 | metricOptions: writable([]), 20 | metricMultiFieldOptions: writable([]), 21 | typeOptions: writable([]), 22 | datasetOptions: writable([]), 23 | fieldOptions: writable([]), 24 | 25 | // other computed properties 26 | aggParamsInfo: writable([]), 27 | computedQuery: writable({}), 28 | readyForRequest: writable(false), 29 | response: writable(null), 30 | responseStatus: writable({ 31 | error: false, 32 | matching: false, 33 | pending: false, 34 | }) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/numToNum.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | number, 4 | numString, 5 | recordLike, 6 | unionOf 7 | } from '$lib/types/index.js'; 8 | 9 | export default { 10 | id: 'numkeyToNum', 11 | doc: { 12 | keys: 'Stringified numbers. E.g. percentiles thresholds.', 13 | values: 'Numbers. E.g. for percentiles, each number is the value at which N percent of the data (the numeric key) is below it.', 14 | key: 'The datapoint key.', 15 | value: 'The datapoint value.', 16 | }, 17 | shape: { 18 | values: unionOf( 19 | recordLike({ 20 | keys: numString, 21 | values: number, 22 | }), 23 | arrayOf({ 24 | key: number, 25 | value: number, 26 | }) 27 | ) 28 | }, 29 | tag: 'multi-value', 30 | } 31 | 32 | /* 33 | { 34 | "values": { 35 | "1.0": 5999.9276611275045, 36 | "5.0": 23167.228213570834 37 | } 38 | } 39 | 40 | { 41 | "values": [ 42 | { 43 | "key": 1.0, 44 | "value": 5.0 45 | }, 46 | { 47 | "key": 5.0, 48 | "value": 25.0 49 | } 50 | ] 51 | } 52 | */ 53 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/selecting.config.js: -------------------------------------------------------------------------------- 1 | export const selectingConfig = { 2 | initial: 'Idle', 3 | states: { 4 | Idle: { 5 | entry: ['conditionalLog'], 6 | on: { 7 | SELECTED_FIELDS: { 8 | target: 'Idle', 9 | actions: [ 10 | 'selectFields', 11 | 'updateSelectionAggsHierarchy', 12 | 'sendAggsHierarchyUpdated' 13 | ], 14 | }, 15 | TOGGLED_FIELD_COUNTER: { 16 | target: 'Idle', 17 | actions: [ 18 | 'toggleField', 19 | 'updateSelectionAggsHierarchy', 20 | 'sendAggsHierarchyUpdated' 21 | ] 22 | }, 23 | SELECTED_NEXT_FIELD: { 24 | target: 'Idle', 25 | actions: [ 26 | 'selectNextField', 27 | 'updateSelectionAggsHierarchy', 28 | 'sendAggsHierarchyUpdated' 29 | ] 30 | }, 31 | SELECTED_PREVIOUS_FIELD: { 32 | target: 'Idle', 33 | actions: [ 34 | 'selectPreviousField', 35 | 'updateSelectionAggsHierarchy', 36 | 'sendAggsHierarchyUpdated' 37 | ] 38 | }, 39 | } 40 | } 41 | } 42 | }; 43 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/stringStats.js: -------------------------------------------------------------------------------- 1 | import {optional} from '$lib/elasticsearch/types/params.js'; 2 | import { 3 | integer, 4 | float, 5 | recordLike 6 | } from '$lib/types/index.js'; 7 | 8 | export default { 9 | id: 'string_stats', 10 | doc: { 11 | count: 'The number of non-empty fields counted.', 12 | min_length: 'The length of the shortest term.', 13 | max_length: 'The length of the longest term.', 14 | avg_length: 'The average length computed over all terms.', 15 | entropy: 'The Shannon Entropy value computed over all terms collected by the aggregation. Shannon entropy quantifies the amount of information contained in the field. It is a very useful metric for measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc.' 16 | }, 17 | shape: { 18 | count: integer, 19 | min_length: integer, 20 | max_length: integer, 21 | avg_length: float, 22 | entropy: float, 23 | distribution: optional(recordLike({ 24 | values: float 25 | })) 26 | }, 27 | tag: 'multi-value', 28 | } 29 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/min.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string, number} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 'min', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-min-aggregation.html', 17 | docs: 'Returns the minimum value among numeric values extracted from the aggregated documents.', 18 | fieldType: esNumericButBoolean, 19 | label: 'Min', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), 23 | script: optional(script), 24 | }, 25 | requestDoc: { 26 | field, 27 | missing, 28 | script: scriptDoc 29 | }, 30 | response, 31 | tag: 'metric', 32 | version: '7.9', 33 | }; 34 | 35 | // [0] 7.9: no params table 36 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/avg.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string, number} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 'avg', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-avg-aggregation.html', 17 | docs: 'Computes the average of numeric values that are extracted from the aggregated documents.', 18 | fieldType: esNumericButBoolean, 19 | label: 'Average', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), 23 | script: optional(script), 24 | }, 25 | requestDoc: { 26 | field, 27 | missing, 28 | script: scriptDoc 29 | }, 30 | response, 31 | tag: 'metric', 32 | version: '7.9', 33 | }; 34 | 35 | // [0] 7.9: no params table 36 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/max.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string, number} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 'max', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-max-aggregation.html', 17 | docs: 'Returns the maximum value among the numeric values extracted from the aggregated documents.', 18 | fieldType: esNumericButBoolean, 19 | label: 'Max', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), 23 | script: optional(script), 24 | }, 25 | requestDoc: { 26 | field, 27 | missing, 28 | script: scriptDoc 29 | }, 30 | response, 31 | tag: 'metric', 32 | version: '7.9', 33 | }; 34 | 35 | // [0] 7.9: no params table 36 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsDate.js: -------------------------------------------------------------------------------- 1 | import {arrayOf, integer, objectOf} from '$lib/types/index.js'; 2 | import {esDates} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 3 | 4 | export default { 5 | id: 'buckets_date', 6 | doc: { 7 | key_as_string: 'The date as string (e.g. `1986-01-01T00:00:00.000Z`).', 8 | key: 'The date as timestamp (e.g. 504921600000).', 9 | doc_count: 'The amount of documents in a bucket.', 10 | }, 11 | shape: { 12 | buckets: arrayOf(objectOf({ 13 | doc_count: integer, 14 | key_as_string: esDates, 15 | key: integer, 16 | })) 17 | }, 18 | tag: 'multi-bucket', 19 | version: '7.9', 20 | } 21 | 22 | /* 23 | { 24 | "buckets": [ 25 | { 26 | "doc_count": 1, 27 | "key_as_string": "1986-01-01T00:00:00.000Z", 28 | "key": 504921600000 29 | }, 30 | { 31 | "doc_count": 0, 32 | "key_as_string": "1987-01-01T00:00:00.000Z", 33 | "key": 536457600000 34 | }, 35 | { 36 | "doc_count": 1, 37 | "key_as_string": "1988-01-01T00:00:00.000Z", 38 | "key": 567993600000 39 | } 40 | ] 41 | } 42 | */ 43 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/topMetrics.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | objectOf, 5 | recordLike 6 | } from '$lib/types/index.js'; 7 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 8 | 9 | export default { 10 | id: 'top', 11 | doc: { 12 | // sort: '? TODO', 13 | metrics: 'An object where keys are fields and values are numbers or date strings.', 14 | top: 'The aggregation key.', 15 | }, 16 | shape: { 17 | top: arrayOf( 18 | objectOf({ 19 | sort: arrayOf(integer), 20 | metrics: recordLike({ 21 | values: esNumericButBoolean 22 | }), 23 | }) 24 | ) 25 | }, 26 | tag: 'multi-value', 27 | } 28 | 29 | 30 | /* 31 | { 32 | "top": [ 33 | { 34 | "sort": [3], 35 | "metrics": { 36 | "m": 2.718280076980591, 37 | "i": -12, 38 | "d": "2019-12-31T00:12:12.000Z" 39 | } 40 | }, 41 | {"sort": [3], "metrics": {"m": 2.718280076980591 } }, 42 | {"sort": [2], "metrics": {"m": 1.0 } }, 43 | {"sort": [1], "metrics": {"m": 3.1414999961853027 } } 44 | ] 45 | } 46 | 47 | */ 48 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/utils/coverage.js: -------------------------------------------------------------------------------- 1 | import {arraySum} from '@svizzle/utils'; 2 | import * as _ from 'lamb'; 3 | import {getBeCoverageEndpointURL} from '$lib/utils/specs.js' 4 | 5 | import {authedRequest} from '$lib/app/utils/net.js'; 6 | 7 | const transformCoverageResponse = _.pipe([ 8 | _.pairs, 9 | _.mapWith(([key, count]) => [ 10 | key, 11 | { 12 | id: key, 13 | fields: key.split('&'), 14 | count 15 | } 16 | ]), 17 | _.fromPairs 18 | ]); 19 | 20 | const fieldSetsCache = {}; 21 | 22 | export const getCoveragePromise = async dataset => { 23 | const endpoint = getBeCoverageEndpointURL(dataset); 24 | 25 | if (fieldSetsCache[endpoint]) { 26 | return fieldSetsCache[endpoint]; 27 | } 28 | 29 | const jsonResponse = await authedRequest('GET', endpoint); 30 | 31 | const fieldSetsMap = transformCoverageResponse(jsonResponse); 32 | const total = arraySum(_.values(fieldSetsMap).map(_.getPath('count'))); 33 | 34 | const result = { 35 | fieldSetsMap, 36 | total 37 | } 38 | 39 | fieldSetsCache[endpoint] = result; 40 | return result; 41 | } 42 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/reverse_nested.todo.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional} from '$lib/elasticsearch/types/params.js'; 3 | import {stringD} from '$lib/types/index.js'; 4 | 5 | export default { 6 | id: 'reverse_nested', 7 | availability: { 8 | from: '1.3' 9 | }, 10 | docPath: '/search-aggregations-bucket-reverse-nested-aggregation.html', 11 | docs: 'A special single bucket aggregation that enables aggregating on parent docs from nested documents. Effectively this aggregation can break out of the nested block structure and link to other nested structures or the root document, which allows nesting other aggregations that aren’t part of the nested object in a nested aggregation.', 12 | fieldType: esSearchableField, 13 | label: 'Reverse Nested', 14 | request: { 15 | // TODO Check: no `field`, no `missing` 16 | path: optional(stringD('')) 17 | }, 18 | requestDoc: { 19 | path: 'Path of the nested documents within the top level documents.' 20 | }, 21 | subAggs: true, 22 | tag: 'bucketing', 23 | }; 24 | -------------------------------------------------------------------------------- /be/src/bin/clearCache.js: -------------------------------------------------------------------------------- 1 | import { Command } from 'commander'; 2 | import { MongoClient } from 'mongodb'; 3 | 4 | const { MONGO_ROOT_USER: user, MONGO_ROOT_PASSWORD: password } = process.env; 5 | 6 | if (!user || !password) { 7 | throw new Error(`You must set the MONGO_ROOT_USER and MONGO_ROOT_PASSWORD 8 | envrionment variables in order to use this script.`) 9 | } 10 | 11 | const uri = `mongodb://${user}:${password}@dapsboard.cache.dev.dap-tools.uk:27017`; 12 | const client = new MongoClient(uri); 13 | 14 | const database = client.db('dapsboard'); 15 | const cache = database.collection('cache'); 16 | 17 | 18 | const program = new Command(); 19 | program.requiredOption( 20 | '-i, --index ', 21 | 'ES index corresponding to the cache to clear' 22 | ); 23 | 24 | program.parse(); 25 | const options = program.opts(); 26 | 27 | const main = async () => { 28 | const cursor = cache.find({ url: { $regex: `.*/${options.index}$` } }) 29 | for await (const doc of cursor) { 30 | await cache.deleteOne(doc); 31 | } 32 | await cursor.close(); 33 | process.exit(0); 34 | } 35 | 36 | main(); 37 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/coverage/utils.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {isIterableEmpty} from '@svizzle/utils'; 3 | 4 | /* trandsformations */ 5 | 6 | export const initSelectedFieldSetsMap = _.pipe([ 7 | _.mapWith(_.collect([ 8 | _.getKey('id'), 9 | _.always(false) 10 | ])), 11 | _.fromPairs 12 | ]); 13 | 14 | export const initSelectedFieldsMap = _.pipe([ 15 | _.mapWith(_.collect([ 16 | _.identity, 17 | _.always(false) 18 | ])), 19 | _.fromPairs 20 | ]); 21 | 22 | export const makeGetFieldSetsFor = fieldIds => _.pipe([ 23 | _.pick(fieldIds), 24 | _.values, 25 | ]); 26 | 27 | export const makeGetSelectedFields = (fieldSets, fieldIds) => _.pipe([ 28 | _.pick(fieldIds), 29 | _.values, 30 | _.mapWith(_.getKey('fields')), 31 | _.unless(isIterableEmpty, _.reduceRightWith(_.intersection, fieldSets)) 32 | ]) 33 | 34 | export const getTruthyKeys = _.pipe([ 35 | _.pairs, 36 | _.filterWith(_.getAt(1)), 37 | _.mapWith(_.getAt(0)), 38 | ]); 39 | 40 | /* data formatting */ 41 | 42 | export const getPercent = (value, total) => `${Math.round(value / total * 100)}%`; 43 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/geo_bounds.js: -------------------------------------------------------------------------------- 1 | import { 2 | geoPointObject, 3 | geoPointString 4 | } from '$lib/elasticsearch/types/fields.js'; 5 | import {optional} from '$lib/elasticsearch/types/params.js'; 6 | import {booleanD, string} from '$lib/types/index.js'; 7 | import {field, missing} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 8 | import response from '$lib/elasticsearch/aggs/response/geoBounds.js'; 9 | 10 | export default { 11 | id: 'geo_bounds', 12 | availability: { 13 | from: '1.3' 14 | }, 15 | docPath: '/search-aggregations-metrics-geobounds-aggregation.html', 16 | docs: 'Computes the bounding box containing all geo values for a field.', 17 | fieldType: geoPointObject, 18 | label: 'Geo Bounds', 19 | request: { 20 | field: string, 21 | missing: optional(geoPointString), 22 | wrap_longitude: optional(booleanD(true)), 23 | }, 24 | requestDoc: { 25 | field, 26 | missing, 27 | wrap_longitude: 'Specifies whether the bounding box should be allowed to overlap the international date line.', 28 | }, 29 | response, 30 | tag: 'metric', 31 | version: '7.9', 32 | }; 33 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/sum.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string, number} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 'sum', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-sum-aggregation.html', 17 | docs: 'Sums up numeric values that are extracted from the aggregated documents.', 18 | fieldType: esNumericButBoolean, 19 | label: 'Sum', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), // [1] 23 | script: optional(script), 24 | }, 25 | requestDoc: { 26 | field, 27 | missing, 28 | script: scriptDoc, 29 | }, 30 | response, 31 | tag: 'metric', 32 | version: '7.9', 33 | }; 34 | 35 | // [0] 7.9: no params table 36 | // [1] TODO add constraint to be same type as the field type 37 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/utils/aggParams.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import { 3 | makeIsIncluded, 4 | makeMergeAppliedFnMap, 5 | valuesWith, 6 | } from '@svizzle/utils'; 7 | import { 8 | nativeKey, 9 | shapeKey, 10 | defaultKey, 11 | isAppDefaultKey 12 | } from '$lib/types/index.js'; 13 | import {optionalKey} from '$lib/elasticsearch/types/params.js'; 14 | 15 | import {is_required} from '$lib/elasticsearch/types/params.utils.js'; 16 | 17 | const getParamInfo = valuesWith( 18 | (paramType, paramId) => ({ 19 | paramId, 20 | type: paramType, 21 | required: is_required(paramType), 22 | displayText: JSON.stringify(paramType, null, 2) 23 | }) 24 | ); 25 | 26 | export const getParamsInfo = _.pipe([ 27 | getParamInfo, 28 | _.filterWith(_.not(_.pipe([ 29 | _.getKey('paramId'), 30 | makeIsIncluded([nativeKey, shapeKey, optionalKey, defaultKey, isAppDefaultKey]) 31 | ]))), 32 | _.sortWith([_.not(_.getKey('required'))]) 33 | ]); 34 | 35 | export const mergeDocs = (partialParamsInfo, docs) => 36 | partialParamsInfo.map(makeMergeAppliedFnMap({ 37 | documentation: param => docs[param.paramId] 38 | })); 39 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/stats.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {string, number} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/stats.js'; 10 | 11 | export default { 12 | id: 'stats', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-stats-aggregation.html', 17 | docs: 'Computes stats over numeric values extracted from the aggregated documents.', 18 | fieldType: esNumericButBoolean, 19 | label: 'Stats', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), // [1] 23 | script: optional(script), 24 | }, 25 | requestDoc: { 26 | field, 27 | missing, 28 | script: scriptDoc, 29 | }, 30 | response, 31 | tag: 'metric', 32 | version: '7.9', 33 | }; 34 | 35 | // [0] 7.9: no params table 36 | // [1] TODO add constraint to be same type as the field type 37 | -------------------------------------------------------------------------------- /fe/src/lib/utils/svizzle/utils/[any-array]-[array-object].js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @svizzle/utils/[any-array]-[array-object] 3 | */ 4 | 5 | import * as _ from 'lamb'; 6 | 7 | /** 8 | * Return a function expecting an array and returning an object 9 | * with keys and values defined by the provided function, which expects a value 10 | * and returns a pair [key, value]. 11 | * 12 | * @function 13 | * @arg {function} valueToPair - (Any -> Array) Turns a value into a pair 14 | * @return {function} - (Array -> Object) 15 | * 16 | * @example 17 | > valueToPair = x => [`${x}${x}`, `${x}${x}${x}`]; 18 | > arrayToObject1 = arrayToObjectWith(valueToPair) 19 | > arrayToObject1(['a', 'b', 1]) 20 | {aa: 'aaa', bb: 'bbb', 11: '111'} 21 | > 22 | > valueIndexToPair = (x, i) => [`${i}${i}`, `${x}${x}${x}`]; 23 | > arrayToObject2 = arrayToObjectWith(valueIndexToPair) 24 | > arrayToObject2(['a', 'b', 1]) 25 | {'00': 'aaa', '11': 'bbb', '22': '111'} 26 | * 27 | * @version next 28 | */ 29 | export const arrayToObjectWith = valueIndexToPair => _.pipe([ 30 | _.mapWith(valueIndexToPair), 31 | _.fromPairs, 32 | ]); 33 | // FIXME unused as of now 34 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/route.config.js: -------------------------------------------------------------------------------- 1 | import {historyConfig} from './history.config.js'; 2 | import {resultsConfig} from './results.config.js'; 3 | import {searchConfig} from './search.config.js'; 4 | import {selectingConfig} from './selecting.config.js'; 5 | 6 | export const exploreConfig = { 7 | id: 'ExploreRoute', 8 | initial: 'Interactive', 9 | states: { 10 | Interactive: { 11 | type: 'parallel', 12 | states: { 13 | // order counts for rendering in the visualiser 14 | History: historyConfig, 15 | Selecting: selectingConfig, 16 | Results: resultsConfig, 17 | Search: searchConfig 18 | }, 19 | on: { 20 | DATASET_UPDATED: { 21 | target: '#ExploreRoute.Interactive', 22 | actions: [ 23 | 'selectDataset', 24 | 'setDataset', 25 | 'setNeededFields', 26 | 'setURL', 27 | ] 28 | }, 29 | RESET_SOURCES: { 30 | target: '#ExploreRoute.Interactive', 31 | actions: ['resetSources'] 32 | }, 33 | SELECT_SOURCE: { 34 | target: '#ExploreRoute.Interactive', 35 | actions: ['selectSource'] 36 | }, 37 | }, 38 | } 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsRange.js: -------------------------------------------------------------------------------- 1 | import {optional} from '$lib/elasticsearch/types/params.js'; 2 | import { 3 | arrayOf, 4 | integer, 5 | string, 6 | number, 7 | objectOf, 8 | } from '$lib/types/index.js'; 9 | 10 | export default { 11 | id: 'buckets_range', 12 | doc: { 13 | doc_count: 'The amount of documents in a bucket.', 14 | from: 'Start of the range (included).', 15 | key: 'The range key (e.g. `*-100.0`, `100.0-200.0`, `200.0-*`, or custom if ranges are named`).', 16 | to: 'End of the range (excluded).', 17 | }, 18 | shape: { 19 | buckets: arrayOf( 20 | objectOf({ 21 | doc_count: integer, 22 | from: optional(number), 23 | key: string, 24 | to: optional(number), 25 | }) 26 | ) 27 | }, 28 | tag: 'multi-bucket', 29 | } 30 | 31 | /* 32 | { 33 | "buckets": [ 34 | { 35 | "doc_count": 2, 36 | "key": "*-100.0", 37 | "to": 100.0 38 | }, 39 | { 40 | "doc_count": 2, 41 | "from": 100.0, 42 | "key": "100.0-200.0", 43 | "to": 200.0 44 | }, 45 | { 46 | "doc_count": 3, 47 | "from": 200.0, 48 | "key": "200.0-*" 49 | } 50 | ] 51 | } 52 | */ 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Nesta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/Tab.svelte: -------------------------------------------------------------------------------- 1 | 22 | 23 | {#if isTitleSlot} 24 | 32 | {/if} 33 | 34 | {#if isContentSlot && isSelected} 35 | 36 | {/if} 37 | 38 | 49 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/displays/LevelDisplay.svelte: -------------------------------------------------------------------------------- 1 | 14 | 15 |
    16 | {#each aggs as aggsForKey} 17 | {@const path = aggsForKey.path} 18 | 19 |

    20 | {getSummary(path[1])} 21 | by 22 | {getSummary(path[0])} 23 |

    24 |
    25 | {#each aggsForKey.result as agg} 26 | 31 | {/each} 32 |
    33 |
    34 | {/each} 35 |
    36 | 37 | 44 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/types/fields.utils.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import {makeIsIncluded} from '@svizzle/utils'; 3 | 4 | /* ES string fields */ 5 | 6 | export const isWithKeywordTypeId = 7 | makeIsIncluded(['textWithKeyword', 'textWithKeywordArray']); 8 | 9 | export const keywordFieldTypes = [ 10 | 'keyword', 11 | 'keywordArray', 12 | 'textWithKeyword', 13 | 'textWithKeywordArray' 14 | ]; 15 | 16 | /* all */ 17 | 18 | export const makeGetFieldsOfTypes = types => _.pipe([ 19 | _.pairs, 20 | _.filterWith(_.pipe([ 21 | _.getPath('1.type'), 22 | makeIsIncluded(types) 23 | ])), 24 | _.mapWith(_.getAt(0)) 25 | ]); 26 | 27 | /* dates */ 28 | 29 | /* TODO 30 | export const isValidDate 31 | - isString 32 | 33 | export const isValidDate_YYYYMMDD_dash 34 | - regex passes date_YYYYMMDD_dash.format.value 35 | 36 | export const date_YYYYMMDD_dash_time 37 | - regex passes date_YYYYMMDD_dash_time.format.value 38 | */ 39 | 40 | 41 | /* Geo-point datatype */ 42 | 43 | /* TODO 44 | export const isValidGeoPointObj 45 | - isShape('object') 46 | - lat passes numberWithin([-90, 90]), 47 | - lon passes numberWithin([-180, 180]), 48 | */ 49 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/string_stats.js: -------------------------------------------------------------------------------- 1 | import {esSearchableString} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {booleanD, string} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/stringStats.js'; 10 | 11 | export default { 12 | id: 'string_stats', 13 | availability: { 14 | from: '7.6' 15 | }, 16 | docPath: '/search-aggregations-metrics-string-stats-aggregation.html', 17 | docs: 'Computes statistics over string values extracted from the aggregated documents', 18 | fieldType: esSearchableString, 19 | label: 'String Stats', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(string), 23 | script: optional(script), 24 | show_distribution: optional(booleanD(false)), 25 | }, 26 | requestDoc: { 27 | field, 28 | missing, 29 | show_distribution: 'TODO', 30 | script: scriptDoc, 31 | }, 32 | response, 33 | tag: 'metric', 34 | version: '7.9', 35 | }; 36 | 37 | // [0] 7.9: no params table 38 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/boxplot.js: -------------------------------------------------------------------------------- 1 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 2 | import {integerD, number, string} from '$lib/types/index.js'; 3 | import { 4 | field, 5 | missing, 6 | script as scriptDoc 7 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 8 | import response from '$lib/elasticsearch/aggs/response/boxplot.js'; 9 | 10 | export default { 11 | id: 'boxplot', 12 | availability: { 13 | from: '7.7' 14 | }, 15 | docPath: '/search-aggregations-metrics-boxplot-aggregation.html', 16 | docs: 'Computes boxplot of numeric values extracted from the aggregated documents. ', 17 | fieldType: number, 18 | label: 'Boxplot', 19 | request: { // [0] 20 | compression: optional(integerD(100)), 21 | field: string, 22 | missing: optional(number), 23 | script: optional(script) 24 | }, 25 | requestDoc: { 26 | compression: 'Approximate algorithms must balance memory utilization with estimation accuracy. This balance can be controlled using a compression parameter.', 27 | field, 28 | missing, 29 | script: scriptDoc 30 | }, 31 | response, 32 | tag: 'metric', 33 | version: '7.9', 34 | }; 35 | 36 | // [0] 7.9: no params table 37 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsTextScore.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | integer, 4 | number, 5 | objectOf, 6 | string, 7 | } from '$lib/types/index.js'; 8 | 9 | export default { 10 | id: 'buckets_text_score', 11 | doc: { 12 | '{bucket}.bg_count': 'TODO.', 13 | '{bucket}.doc_count': 'The amount of documents in a bucket.', 14 | '{bucket}.key': 'The bucket term.', 15 | '{bucket}.score': 'The bucket score.', 16 | bg_count: 'TODO', 17 | doc_count: 'The amount of documents in a bucket.', 18 | }, 19 | shape: { 20 | bg_count: integer, 21 | buckets: arrayOf( 22 | objectOf({ 23 | bg_count: integer, 24 | doc_count: integer, 25 | key: string, 26 | score: number, 27 | }) 28 | ), 29 | doc_count: integer, 30 | }, 31 | tag: 'multi-bucket', 32 | } 33 | 34 | /* 35 | { 36 | "bg_count": 4814301, 37 | "buckets": [ 38 | { 39 | "bg_count": 751990, 40 | "doc_count": 503375, 41 | "key": "North America", 42 | "score": 0.7828902342051492 43 | }, 44 | { 45 | "bg_count": 1926, 46 | "doc_count": 1247, 47 | "key": "Europe", 48 | "score": 0.0018404593616494103 49 | } 50 | ], 51 | "doc_count": 1153445 52 | } 53 | 54 | */ 55 | -------------------------------------------------------------------------------- /fe/src/routes/+page.svelte: -------------------------------------------------------------------------------- 1 | 2 | Dapsboard - Home 3 | 4 | 5 |
    6 |
    7 |

    Nesta DAPS dashboard

    8 |

    Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris feugiat diam et felis bibendum fringilla. Integer finibus rutrum luctus. Nullam porttitor ante tortor. Aenean iaculis lectus ut aliquam lacinia. Aliquam ornare urna libero. Quisque est libero, porttitor ut bibendum ut, iaculis in nunc. Aliquam lectus erat, pretium eu finibus nec, vulputate non lectus. Proin facilisis iaculis sapien, in malesuada lectus molestie quis. Maecenas in nisi eu eros molestie consectetur. Cras tincidunt quis lectus id maximus.

    9 |
    10 |
    11 | 12 | 36 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/AggSelector.svelte: -------------------------------------------------------------------------------- 1 | 14 | 15 |
    {title}
    16 | 36 | 37 | 48 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/tester.config.js: -------------------------------------------------------------------------------- 1 | import { routeConfig } from './route.config.js'; 2 | 3 | const tester_context = { 4 | autoExecute: false, 5 | cached: false, 6 | hideDisabledAxes: true, 7 | hideDisabledAggs: false, 8 | hideDisabledDatasets: false, 9 | hideDisabledItems: true, 10 | matching: false, 11 | selectionComplete: false, 12 | showFullResponse: false, 13 | queryReady: false 14 | }; 15 | 16 | export const builderTesterConfig = { 17 | id: 'TestingBuilder', 18 | type: 'parallel', 19 | context: tester_context, 20 | states: { 21 | GuardsConfig: { 22 | initial: 'Idle', 23 | states: { 24 | Idle: { 25 | on: { 26 | SELECTION_COMPLETE_TOGGLED: { 27 | target: 'Idle', 28 | actions: ['toggleSelectionComplete'] 29 | }, 30 | QUERY_READY_TOGGLED: { 31 | target: 'Idle', 32 | actions: ['toggleQueryReady'] 33 | }, 34 | MATCHING_TOGGLED: { 35 | target: 'Idle', 36 | actions: ['toggleMatching'] 37 | }, 38 | CACHED_TOGGLED: { 39 | target: 'Idle', 40 | actions: ['toggleCached'] 41 | } 42 | } 43 | } 44 | } 45 | }, 46 | BuilderRoute: routeConfig 47 | } 48 | }; 49 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/viz/AggResults.svelte: -------------------------------------------------------------------------------- 1 | 22 | 23 | {#if results} 24 | {#each resultsByFieldSets as [key, aggs]} 25 | {@const depth = key.split(',').length - 1} 26 | 27 |

    Fields {key}

    28 | 35 |
    36 | {/each} 37 | {/if} 38 | 39 | 46 | -------------------------------------------------------------------------------- /fe/svelte.config.js: -------------------------------------------------------------------------------- 1 | import path from 'node:path'; 2 | import {fileURLToPath} from 'node:url'; 3 | 4 | import adapterAuto from '@sveltejs/adapter-auto'; 5 | import adapterNetlify from '@sveltejs/adapter-netlify'; 6 | import adapterStatic from '@sveltejs/adapter-static'; 7 | 8 | import {readJson} from '@svizzle/file'; 9 | 10 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 11 | const SOURCES_PATH = path.resolve(__dirname, './src/lib/app/data/sidebar.json') 12 | const sources = await readJson(SOURCES_PATH); 13 | 14 | const sourceIds = sources.map(({source}) => source); 15 | const exploreRoutes = sourceIds.map(id => `/explore/${id}`); 16 | 17 | // eslint-disable-next-line no-process-env 18 | const {ADAPTER} = process.env; 19 | 20 | const adapterMap = { 21 | auto: adapterAuto(), 22 | netlify: adapterNetlify({ 23 | edge: false, 24 | split: false 25 | }), 26 | static: adapterStatic() 27 | } 28 | 29 | /** @type {import('@sveltejs/kit').Config} */ 30 | const config = { 31 | extensions: ['.svelte', '.md'], 32 | kit: { 33 | adapter: adapterMap[ADAPTER] || adapterMap.auto, 34 | prerender: { 35 | entries: ['*', ...exploreRoutes] 36 | } 37 | } 38 | }; 39 | 40 | export default config; 41 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/extended_stats.js: -------------------------------------------------------------------------------- 1 | import {esNumeric} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {floatD, number, string} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/extendedStats.js'; 10 | 11 | export default { 12 | id: 'extended_stats', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-extendedstats-aggregation.html', 17 | docs: 'Calculates an approximate count of distinct values.', 18 | label: 'Extended Stats', 19 | fieldType: esNumeric, 20 | request: { // [0] 21 | field: string, 22 | missing: optional(number), 23 | script: optional(script), 24 | sigma: optional(floatD(2)), 25 | }, 26 | requestDoc: { 27 | field, 28 | missing, 29 | script: scriptDoc, 30 | sigma: 'Can be any non-negative double which controls how many standard deviations +/- from the mean should be displayed.' 31 | }, 32 | response, 33 | tag: 'metric', 34 | version: '7.9', 35 | }; 36 | 37 | // [0] 7.9: no params table 38 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/explore/suggestions/FieldMenu.svelte: -------------------------------------------------------------------------------- 1 | 14 | 15 | {#each fieldStats as field} 16 |
    selectField(field.name)} 21 | > 22 | {field.name} 23 | {field.count} 24 |
    25 | {/each} 26 | 27 | 47 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/Collapsible.svelte: -------------------------------------------------------------------------------- 1 | 12 | 13 |
    14 |
    15 | 23 | 24 |
    25 |
    26 | 27 |
    28 |
    29 | 30 | 59 | -------------------------------------------------------------------------------- /be/src/coverage.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | 3 | import { count, getMappings } from 'dap_dv_backends_utils/es/index.mjs'; 4 | import { scroll } from 'dap_dv_backends_utils/es/search.mjs'; 5 | 6 | const isFieldInDoc = (field, doc) => { 7 | if (field in doc) { 8 | return doc[field] !== null; 9 | } 10 | return false; 11 | } 12 | 13 | const computeSet = (fields, doc) => { 14 | const set = _.filter(fields, f => isFieldInDoc(f, doc)); 15 | const setString = _.join(set, '&'); 16 | return setString; 17 | } 18 | 19 | export const coverage = async (domain, index) => { 20 | const total = await count(domain, index); 21 | const mapping = await getMappings(domain, index); 22 | const fields = _.keys( 23 | mapping[index].mappings.properties 24 | || mapping[index].mappings._doc.properties 25 | ); 26 | const scroller = scroll(domain, index, {size: 10000}); 27 | const counts = {}; 28 | let progress = 0; 29 | let page; 30 | for await (page of scroller) { 31 | _.forEach( 32 | page.hits.hits, 33 | doc => { 34 | const set = computeSet(fields, doc._source); 35 | counts[set] = set in counts ? counts[set] + 1 : 1; 36 | } 37 | ); 38 | progress += page.hits.hits.length; 39 | console.log(progress / total); 40 | } 41 | return counts; 42 | } 43 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/ref/aggToResponseType.js: -------------------------------------------------------------------------------- 1 | export default { 2 | "auto_date_histogram": "buckets_date", 3 | "avg": "value", 4 | "boxplot": "boxplot", 5 | "cardinality": "value", 6 | "date_histogram": "buckets_date", 7 | "date_range": "buckets_date_range", 8 | "extended_stats": "extended_stats", 9 | "geo_bounds": "geo_bounds", 10 | "geo_centroid": "geo_centroid", 11 | "geo_distance": "buckets_range", 12 | "geohash_grid": "buckets_geohash_grid", 13 | "geotile_grid": "buckets_geotile_grid", 14 | "histogram": "buckets_number", 15 | "max": "value", 16 | "median_absolute_deviation": "value", 17 | "min": "value", 18 | "missing": "doc_count", 19 | "percentile_ranks": "numkeyToNum", 20 | "percentiles": "numkeyToNum", 21 | "range": "buckets_range", 22 | "rare_terms": "buckets_number", 23 | "rate": "value", 24 | "scripted_metric": "value", 25 | "significant_terms": "buckets_text_score", 26 | "significant_text": "buckets_text_score", 27 | "stats": "stats", 28 | "string_stats": "string_stats", 29 | "sum": "value", 30 | "t_test": "value", 31 | "terms": "buckets_terms", 32 | "top_hits": "hits", 33 | "top_metrics": "top", 34 | "value_count": "value", 35 | "variable_width_histogram": "buckets_number", 36 | "weighted_avg": "value" 37 | } 38 | -------------------------------------------------------------------------------- /specs/indices/hpmt_epc_v0.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | - 3 | provider_name: EPC 4 | provider_url: https://epc.opendatacommunities.org/ 5 | dataset: 6 | api_type: ElasticSearch 7 | api_version: 7.10 8 | config_url: https://gist.githubusercontent.com/doogyb/05f58239c674feb0975b7c582c993b1a/raw/177fca03e4ec82f188294b6dc5129ce246ecdae3/epc_with_hp_install_dates_v0_config.json 9 | endpoint_url: https://es.annotations.dap-tools.uk/epc_with_hp_install_dates 10 | provider_name: DAPS (Nesta) 11 | provider_url: https://github.com/nestauk/nesta 12 | schema: 13 | postcode: 14 | type: textWithKeyword 15 | postcode_district: 16 | type: textWithKeyword 17 | tenure: 18 | type: textWithKeyword 19 | built_form: 20 | type: textWithKeyword 21 | property_type: 22 | type: textWithKeyword 23 | construction_age_band: 24 | type: textWithKeyword 25 | number_habitable_rooms: 26 | type: float 27 | mains_gas_flag: 28 | type: textWithKeyword 29 | energy_consumption_current: 30 | type: float 31 | current_energy_rating: 32 | type: textWithKeyword 33 | country: 34 | type: textWithKeyword 35 | hp_installed: 36 | type: boolean 37 | hp_install_date: 38 | type: date 39 | version: 0 -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/README.md: -------------------------------------------------------------------------------- 1 | # ElasticSearch Aggregations 2 | 3 | ## Composition Rules 4 | 5 | * Metric aggregations can't have children 6 | * Most bucketing aggregations can have other bucketing or metric 7 | aggregations as children. 8 | * The `Global` aggregator can only be top level. 9 | * The `Reverse Nested` aggregator can only be defined inside a `Nested` 10 | aggregation. 11 | * The `Significant Text` aggregation can be used anywhere but: 12 | > Re-analyzing large result sets will require a lot of time and memory. It is 13 | > recommended that the `significant_text` aggregation is used as a child of 14 | > either the `sampler` or `diversified sampler` aggregation to limit the 15 | > analysis to a small selection of top-matching documents e.g. 200. This will 16 | > typically improve speed, memory use and quality of results. 17 | 18 | 19 | # ElasticSearch Filters 20 | 21 | ## `geo_bounding_box` 22 | 23 | See https://www.elastic.co/guide/en/elasticsearch/reference/7.0/search-aggregations-bucket-geotilegrid-aggregation.html#_high_precision_requests_2 24 | 25 | > When requesting detailed buckets (typically for displaying a "zoomed in" map) a filter like `geo_bounding_box` should be applied to narrow the subject area otherwise potentially millions of buckets will be created and returned. 26 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/utils/version.spec.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert'; 2 | 3 | import {makeIsAggVersionCompatible} from './version.js'; 4 | 5 | describe('elasticsearch/aggs/utils/version', function () { 6 | describe('makeIsAggVersionCompatible', function () { 7 | const isCompatible = makeIsAggVersionCompatible('6.3'); 8 | 9 | it('same `from`: OK', function () { 10 | assert.deepStrictEqual(isCompatible({ 11 | availability: {from: '6.3'} 12 | }), true); 13 | }); 14 | it('lower `from`: OK', function () { 15 | assert.deepStrictEqual(isCompatible({ 16 | availability: {from: '6.1'} 17 | }), true); 18 | }); 19 | it('higher `from`: fails', function () { 20 | assert.deepStrictEqual(isCompatible({ 21 | availability: {from: '7.0'} 22 | }), false); 23 | }); 24 | it('same `to`: OK', function () { 25 | assert.deepStrictEqual(isCompatible({ 26 | availability: {from: '0.1', to: '6.3'} 27 | }), true); 28 | }); 29 | it('higher `to`: OK', function () { 30 | assert.deepStrictEqual(isCompatible({ 31 | availability: {from: '0.1', to: '6.9'} 32 | }), true); 33 | }); 34 | it('lower `to`: fails', function () { 35 | assert.deepStrictEqual(isCompatible({ 36 | availability: {from: '0.1', to: '5.4'} 37 | }), false); 38 | }); 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/range.js: -------------------------------------------------------------------------------- 1 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 2 | import { 3 | arrayOf, 4 | booleanD, 5 | number, 6 | objectOf, 7 | someOf, 8 | string, 9 | } from '$lib/types/index.js'; 10 | import {field, keyed, script as scriptDoc} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 11 | import response from '$lib/elasticsearch/aggs/response/bucketsRange.js'; 12 | 13 | export default { 14 | id: 'range', 15 | availability: { 16 | from: '1.3' 17 | }, 18 | docPath: '/search-aggregations-bucket-range-aggregation.html', 19 | docs: 'A multi-bucket value source based aggregation that enables the user to define a set of ranges - each representing a bucket.', 20 | fieldType: number, 21 | label: 'Range', 22 | needsParent: false, 23 | request: {// [0] 24 | field: string, 25 | keyed: optional(booleanD(false)), 26 | ranges: arrayOf(objectOf({ 27 | key: optional(string), 28 | __extent: someOf({ 29 | from: number, 30 | to: number, 31 | }), 32 | })), 33 | script: optional(script) 34 | }, 35 | requestDoc: { 36 | field, 37 | keyed, 38 | ranges: 'Set of ranges, each representing a bucket.', 39 | script: scriptDoc 40 | }, 41 | response, 42 | subAggs: true, 43 | tag: 'bucketing', 44 | version: '7.9', 45 | }; 46 | 47 | // [0] 7.9: no params table 48 | -------------------------------------------------------------------------------- /fe/src/lib/utils/net.js: -------------------------------------------------------------------------------- 1 | function logError (e, s) { 2 | console.log('Error: ', e, s); 3 | } 4 | 5 | /* 6 | class ESError extends Error { 7 | constructor(message, json) { 8 | super(message); 9 | this.jsonMessage = json; 10 | } 11 | } 12 | */ 13 | 14 | export async function request ( 15 | method, 16 | url, 17 | { headers = {}, data, fetch = window && window.fetch, type = 'json' } = {} 18 | ) { 19 | let reqOptions = { method, headers }; 20 | if (data) { 21 | if (data instanceof FormData) { 22 | reqOptions.headers["content-type"] = "multipart/form-data"; 23 | reqOptions.body = data; 24 | } else { 25 | reqOptions.headers["content-type"] = "application/json"; 26 | reqOptions.body = JSON.stringify(data); 27 | } 28 | } 29 | let response; 30 | try { 31 | response = await fetch(url, reqOptions); 32 | if (Math.floor(response.status / 100) !== 2) { 33 | throw new Error('API request failed!'); 34 | } 35 | return response[type](); 36 | } catch (error) { 37 | if (response) { 38 | // FIXME not all HTTP error responses necesarily have a JSON body 39 | error.jsonMessage = await response.clone().json(); 40 | error.httpStatus = response.status; 41 | error.httpStatusText = response.statusText; 42 | } 43 | 44 | logError('request() failure', error, error.stack); 45 | throw error; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/bucketsDateRange.js: -------------------------------------------------------------------------------- 1 | import {optional} from '$lib/elasticsearch/types/params.js'; 2 | import { 3 | arrayOf, 4 | integer, 5 | string, 6 | number, 7 | objectOf, 8 | } from '$lib/types/index.js'; 9 | 10 | export default { 11 | id: 'buckets_date_range', 12 | doc: { 13 | doc_count: 'The amount of documents in a bucket.', 14 | from_as_string: 'Start of the range (included) - as a string.', 15 | from: 'Start of the range (included).', 16 | key: 'The range key (e.g. `*-100.0`, `100.0-200.0`, `200.0-*`, or custom if ranges are named`).', 17 | to_as_string: 'End of the range (excluded) - as a string.', 18 | to: 'End of the range (excluded).', 19 | }, 20 | shape: { 21 | buckets: arrayOf( 22 | objectOf({ 23 | doc_count: integer, 24 | from_as_string: optional(string), 25 | from: optional(number), 26 | key: string, 27 | to_as_string: optional(string), 28 | to: optional(number), 29 | }) 30 | ) 31 | }, 32 | tag: 'multi-bucket', 33 | } 34 | 35 | /* 36 | { 37 | "buckets": [ 38 | { 39 | "doc_count": 7, 40 | "key": "*-10-2015", 41 | "to_as_string": "10-2015", 42 | "to": 1.4436576E12 43 | }, 44 | { 45 | "doc_count": 0, 46 | "from_as_string": "10-2015", 47 | "from": 1.4436576E12, 48 | "key": "10-2015-*" 49 | } 50 | ] 51 | } 52 | */ 53 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/hits.js: -------------------------------------------------------------------------------- 1 | import { 2 | arrayOf, 3 | enumsOf, 4 | float, 5 | integer, 6 | object, 7 | objectOf, 8 | unionOf, 9 | } from '$lib/types/index.js'; 10 | 11 | export default { 12 | id: 'hits', 13 | doc: { 14 | 'hits.total.value': 'The amount of selected top documents in a certain bucket.', // inferred, TBD 15 | 'hits.total.relation': 'TODO.', 16 | 'hits.max_score': 'The max score among the selected top documents', // inferred, TBD 17 | 'hits.hits': 'The array of selected top documents', 18 | }, 19 | docLong: {}, 20 | shape: { 21 | hits: objectOf({ 22 | total: objectOf({ 23 | value: integer, 24 | relation: enumsOf(['eq']) 25 | }), 26 | max_score: unionOf(float, null), 27 | hits: arrayOf(object) // array of documents 28 | }) 29 | }, 30 | tag: 'sub-aggregator', // you don't use it to get a value 31 | } 32 | 33 | /* 34 | { 35 | "hits": { 36 | "total": { 37 | "relation": "eq", 38 | "value": 3 39 | }, 40 | "max_score": null, 41 | "hits": [ 42 | { 43 | "_index": "sales", 44 | "_type": "_doc", 45 | "_id": "AVnNBmauCQpcRyxw6ChK", 46 | "_source": { 47 | "date": "2015/03/01 00:00:00", 48 | "price": 200 49 | }, 50 | "sort": [ 51 | 1425168000000 52 | ], 53 | "_score": null 54 | } 55 | ] 56 | } 57 | } 58 | 59 | */ 60 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/utils/query.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | 3 | import {getDefault, hasDefault} from '$lib/types/index.js'; 4 | import {stopWords} from '$lib/utils/stopwords.js'; 5 | 6 | // params that startsWith('__') should be required, with no default 7 | export const makeRequestToQuery = fieldName => _.pipe([ 8 | _.collect([ 9 | obj => _.has(obj, 'field') ? {field: fieldName} : {}, 10 | _.pipe([_.pickIf(hasDefault), _.mapValuesWith(getDefault)]) 11 | ]), 12 | _.apply(_.merge) 13 | ]); 14 | 15 | /** 16 | * Get how many items have a `searchTerm` in the input `fields` 17 | */ 18 | export const getCountQuery = (fields, searchTerm) => ({ 19 | size: 0, 20 | aggs: { 21 | messages: { 22 | filters: { 23 | filters: _.fromPairs(fields.map(name => [name, { 24 | term: { 25 | [name]: searchTerm 26 | } 27 | }])) 28 | } 29 | } 30 | } 31 | }); 32 | 33 | /** 34 | * Get significant words for a `term` in the input `field` 35 | */ 36 | export const getSuggestionsQuery = (field, term) => ({ 37 | size: 0, 38 | query: { 39 | term: { 40 | [field]: term 41 | } 42 | }, 43 | aggs: { 44 | [field]: { 45 | significant_text: { 46 | field, 47 | // TODO include/exclude according to ES version 48 | include: stopWords, 49 | min_doc_count: 1, 50 | size: 20 51 | } 52 | } 53 | } 54 | }); 55 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/tester.options.js: -------------------------------------------------------------------------------- 1 | import { assign } from 'xstate'; 2 | 3 | export const builderTesterOptions = { 4 | actions: { 5 | toggleAutoExecute: assign({ 6 | autoExecute: ctx => !ctx.autoExecute 7 | }), 8 | toggleCached: assign({ 9 | cached: ctx => !ctx.cached 10 | }), 11 | toggleHideDisabledAxes: assign({ 12 | hideDisabledAxes: ctx => !ctx.hideDisabledAxes 13 | }), 14 | toggleHideDisabledAggs: assign({ 15 | hideDisabledAggs: ctx => !ctx.hideDisabledAggs 16 | }), 17 | toggleHideDisabledDatasets: assign({ 18 | hideDisabledDatasets: ctx => !ctx.hideDisabledDatasets 19 | }), 20 | toggleHideDisabledItems: assign({ 21 | hideDisabledItems: ctx => !ctx.hideDisabledItems 22 | }), 23 | toggleShowFullResponse: assign({ 24 | showFullResponse: ctx => !ctx.showFullResponse 25 | }), 26 | toggleMatching: assign({ 27 | matching: ctx => !ctx.matching 28 | }), 29 | toggleSelectionComplete: assign({ 30 | selectionComplete: ctx => !ctx.selectionComplete 31 | }), 32 | toggleQueryReady: assign({ 33 | queryReady: ctx => !ctx.queryReady 34 | }) 35 | }, 36 | guards: { 37 | isAutoExecute: ctx => ctx.autoExecute, 38 | isInCache: ctx => ctx.cached, 39 | isMatching: ctx => ctx.matching, 40 | isSelectionComplete: ctx => ctx.selectionComplete, 41 | isQueryReady: ctx => ctx.queryReady 42 | } 43 | }; 44 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/TabContainer.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 28 | 29 |
    30 |
    31 | 32 |
    33 |
    34 | 35 |
    36 |
    37 | 38 | 58 | -------------------------------------------------------------------------------- /fe/.eslintrc.yml: -------------------------------------------------------------------------------- 1 | root: true 2 | extends: 3 | - '../.eslintrc.yml' 4 | - 'plugin:svelte/base' 5 | rules: 6 | node/no-missing-import: off 7 | node/no-extraneous-import: off 8 | # import/extensions: 9 | # - error 10 | # - always 11 | settings: 12 | import/parsers: 13 | svelte-eslint-parser: 14 | - '.svelte' 15 | '@babel/eslint-parser': 16 | - '.js' 17 | import/resolver: 18 | eslint-import-resolver-custom-alias: 19 | alias: 20 | '$lib': './src/lib' 21 | extensions: 22 | - '.js' 23 | overrides: 24 | - files: 25 | - '**/*.svelte' 26 | rules: 27 | svelte/html-closing-bracket-spacing: error 28 | svelte/html-quotes: 29 | - error 30 | - prefer: single 31 | svelte/indent: 32 | - off 33 | - indent: tab 34 | svelte/no-at-debug-tags: warn 35 | svelte/no-at-html-tags: off 36 | svelte/no-dupe-else-if-blocks: error 37 | svelte/no-dupe-style-properties: error 38 | svelte/no-dynamic-slot-name: error 39 | svelte/no-inner-declarations: error 40 | svelte/no-not-function-handler: error 41 | svelte/no-object-in-text-mustaches: error 42 | svelte/no-shorthand-style-property-overrides: error 43 | svelte/no-unknown-style-directive-property: warn 44 | svelte/no-unused-svelte-ignore: warn 45 | brace-style: off 46 | import/no-unresolved: off 47 | no-invalid-this: off 48 | no-new: off 49 | no-shadow: off 50 | no-self-assign: off 51 | - files: 52 | - '**/*.spec.js' 53 | env: 54 | mocha: true 55 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/cardinality.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import {integerD, string} from '$lib/types/index.js'; 4 | import { 5 | field, 6 | missing, 7 | script as scriptDoc, 8 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 'cardinality', 13 | availability: { 14 | from: '1.3' 15 | }, 16 | docPath: '/search-aggregations-metrics-cardinality-aggregation.html', 17 | docs: 'Calculates an approximate count of distinct values.', 18 | fieldType: esSearchableField, 19 | label: 'Cardinality', 20 | request: { // [0] 21 | field: string, 22 | missing: optional(string), // [1] 23 | precision_threshold: optional(integerD(3000)), 24 | script: optional(script), 25 | }, 26 | requestDoc: { 27 | field, 28 | missing, 29 | precision_threshold: 'Allows to trade memory for accuracy, and defines a unique count below which counts are expected to be close to accurate. Above this value, counts might become a bit more fuzzy. The maximum supported value is 40000, thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000.', 30 | script: scriptDoc, 31 | }, 32 | response, 33 | tag: 'metric', 34 | version: '7.9', 35 | }; 36 | 37 | // [0] 7.9: no params table 38 | // [1] TODO add constraint to be same type as the field type 39 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/median_absolute_deviation.js: -------------------------------------------------------------------------------- 1 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 2 | import { 3 | integerD, 4 | string, 5 | number 6 | } from '$lib/types/index.js'; 7 | import { 8 | field, 9 | missing, 10 | script as scriptDoc, 11 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 12 | import response from '$lib/elasticsearch/aggs/response/value.js'; 13 | 14 | export default { 15 | id: 'median_absolute_deviation', 16 | availability: { 17 | from: '6.6' 18 | }, 19 | docPath: '/search-aggregations-metrics-median-absolute-deviation-aggregation.html', 20 | docs: ' Median absolute deviation is a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation.', 21 | fieldType: number, 22 | label: 'Median Absolute Deviation', 23 | request: { // [0] 24 | compression: optional(integerD(100)), 25 | field: string, 26 | missing: optional(number), 27 | script: optional(script), 28 | }, 29 | requestDoc: { 30 | field, 31 | compression: 'The tradeoff between resource usage and accuracy of a TDigest’s quantile approximation, and therefore the accuracy of this aggregation’s approximation of median absolute deviation, is controlled by the compression parameter.', 32 | missing, 33 | script: scriptDoc, 34 | }, 35 | response, 36 | tag: 'metric', 37 | version: '7.9', 38 | }; 39 | 40 | // [0] 7.9: no params table 41 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/response/extendedStats.js: -------------------------------------------------------------------------------- 1 | import {integer, number, objectOf} from '$lib/types/index.js'; 2 | 3 | export default { 4 | id: 'extended_stats', 5 | doc: { 6 | avg: 'The average of the value extracted from the documents.', 7 | count: 'The amount of documents.', 8 | max: 'The max of all the values extracted from the documents.', 9 | min: 'The min of all the values extracted from the documents.', 10 | std_deviation: 'The standard deviation of the values extracted from the documents.', 11 | std_deviation_bounds: 'The standard deviation bounds.', 12 | sum_of_squares: 'The sum of the square of the values extracted from the documents.', 13 | sum: 'The sum of the values extracted from the documents.', 14 | variance: 'The variance of the values extracted from the documents.', 15 | }, 16 | docLong: {}, 17 | shape: { 18 | avg: number, 19 | count: integer, 20 | max: number, 21 | min: number, 22 | std_deviation: number, 23 | std_deviation_bounds: objectOf({ 24 | lower: number, 25 | upper: number 26 | }), 27 | sum_of_squares: number, 28 | sum: number, 29 | variance: number, 30 | }, 31 | tag: 'multi-value', 32 | } 33 | 34 | /* 35 | { 36 | "avg": 741707.0497661051, 37 | "count": 796084, 38 | "max": 2765946854, 39 | "min": 1, 40 | "std_deviation": 4320042.818181389, 41 | "std_deviation_bounds": { 42 | "lower": -7898378.586596673, 43 | "upper": 9381792.686128883 44 | }, 45 | "sum_of_squares": 15295081725221380000, 46 | "sum": 590461115006, 47 | "variance": 18662769950920.6, 48 | } 49 | 50 | */ 51 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/selecting.options.js: -------------------------------------------------------------------------------- 1 | import {toggleItem} from '@svizzle/utils'; 2 | import * as _ from 'lamb'; 3 | import {get} from 'svelte/store'; 4 | import {assign, send} from 'xstate'; 5 | 6 | import {selectionToAggsHierarchy} from '$lib/app/utils/exploreUtils.js'; 7 | 8 | const selectFields = (ctx, {fields}) => { 9 | ctx.selectedFields.set(fields); 10 | return ctx; 11 | } 12 | 13 | const toggleField = (ctx, {field}) => { 14 | ctx.selectedFields.update(fields => toggleItem(fields, field)); 15 | return ctx; 16 | } 17 | 18 | const selectNextField = ctx => { 19 | ctx.selectedFields.update(_.setAt(-1, get(ctx.nextPair)[0])); 20 | return ctx; 21 | } 22 | 23 | const selectPreviousField = ctx => { 24 | ctx.selectedFields.update(_.setAt(-1, get(ctx.prevPair)[0])); 25 | return ctx; 26 | } 27 | 28 | const updateSelectionAggsHierarchy = ctx => { 29 | const fields = get(ctx.selectedFields); 30 | const {project, source, version} = get(ctx.dataset); 31 | const selectionAggsHierarchy = selectionToAggsHierarchy({fields, project, source, version}); 32 | ctx.selectionAggsHierarchy.set(selectionAggsHierarchy); 33 | return ctx; 34 | } 35 | 36 | export const selectingOptions = { 37 | actions: { 38 | selectFields: assign(selectFields), 39 | selectNextField: assign(selectNextField), 40 | selectPreviousField: assign(selectPreviousField), 41 | sendAggsHierarchyUpdated: send('AGGS_HIERARCHY_UPDATED'), 42 | toggleField: assign(toggleField), 43 | updateSelectionAggsHierarchy: assign(updateSelectionAggsHierarchy), 44 | }, 45 | guards: { 46 | } 47 | }; 48 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/rate.js: -------------------------------------------------------------------------------- 1 | import { 2 | // calendarIntervals, 3 | // calendarIntervalsFromMonth, 4 | optional, 5 | // rateIntervalsFromMonth, 6 | // rateIntervalsToWeek, 7 | script, 8 | } from '$lib/elasticsearch/types/params.js'; 9 | import {number, string} from '$lib/types/index.js'; 10 | import {field, script as scriptDoc} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 11 | import response from '$lib/elasticsearch/aggs/response/value.js'; 12 | 13 | export default { 14 | id: 'rate', 15 | fieldType: number, 16 | availability: { 17 | from: '7.10' 18 | }, 19 | constraints: { 20 | parentAggs: ['date_histogram'], 21 | }, 22 | docPath: '/search-aggregations-metrics-rate-aggregation.html', 23 | docs: 'A rate metrics aggregation can be used only inside a date_histogram and calculates a rate of documents or a field in each date_histogram bucket..', 24 | label: 'Rate', 25 | request: { // [0] 26 | field: optional(string), 27 | script: optional(script), 28 | unit: optional(string), 29 | // __constraints: { 30 | // date_histogram: [ 31 | // [ 32 | // {calendar_interval: calendarIntervals}, 33 | // {unit: rateIntervalsToWeek} 34 | // ], 35 | // [ 36 | // {calendar_interval: calendarIntervalsFromMonth}, 37 | // {unit: rateIntervalsFromMonth} 38 | // ], 39 | // ] 40 | // } 41 | }, 42 | requestDoc: { 43 | field, 44 | script: scriptDoc, 45 | unit: 'The size of the rate (annual, monthly, etc).', 46 | }, 47 | response, 48 | tag: 'metric', 49 | version: '7.9', 50 | }; 51 | 52 | // [0] 7.10: no params table 53 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/selecting.context.js: -------------------------------------------------------------------------------- 1 | import {isNotNil} from '@svizzle/utils'; 2 | import * as _ from 'lamb'; 3 | import {derived, writable} from 'svelte/store'; 4 | 5 | import { 6 | selectedDatasetFieldsIndicesMap, 7 | selectedDatasetFieldsZip, 8 | } from '$lib/app/stores/exploreStores.js'; 9 | 10 | export const createSelectionStores = () => { 11 | const selectedFields = writable([]); 12 | const availableFieldsZip = derived( 13 | [selectedDatasetFieldsZip, selectedFields], 14 | ([zip, fields]) => 15 | zip && fields && _.filter(zip, ([field]) => !fields.includes(field)) 16 | ); 17 | const lastSelectedField = derived(selectedFields, _.last); 18 | const lastSelectedFieldIndex = derived( 19 | [selectedDatasetFieldsIndicesMap, lastSelectedField], 20 | ([map, field]) => isNotNil(map) && isNotNil(field) && map[field] 21 | ); 22 | const nextPair = derived( 23 | [availableFieldsZip, lastSelectedFieldIndex], 24 | ([zip, index]) => 25 | isNotNil(zip) 26 | && isNotNil(index) 27 | && _.find(zip, ([,idx]) => idx > index) 28 | ); 29 | const isNextFieldDisabled = derived(nextPair, _.isUndefined); 30 | const prevPair = derived( 31 | [availableFieldsZip, lastSelectedFieldIndex], 32 | ([zip, index]) => 33 | isNotNil(zip) 34 | && isNotNil(index) 35 | && _.findLast(zip, ([,idx]) => idx < index) 36 | ); 37 | const isPrevFieldDisabled = derived(prevPair, _.isUndefined); 38 | 39 | return { 40 | isNextFieldDisabled, 41 | isPrevFieldDisabled, 42 | nextPair, 43 | prevPair, 44 | selectedFields, 45 | selectionAggsHierarchy: writable({}), 46 | } 47 | }; 48 | -------------------------------------------------------------------------------- /fe/src/routes/_datasets/+layout.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 |
    6 | 28 |
    29 | 30 |
    31 |
    32 | 33 | 76 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/RadioList.svelte: -------------------------------------------------------------------------------- 1 | 27 | 28 |
    33 | {#if isNil(defaultValue)} 34 | 35 | 42 | 43 | 44 | {/if} 45 | {#each typeObject as item} 46 | 47 | 54 | 55 | 56 | {/each} 57 |
    58 | 59 | 73 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/ref/typeGroups.js: -------------------------------------------------------------------------------- 1 | import { 2 | date_YYYYMMDD_dash_time, 3 | date_YYYYMMDD_dash, 4 | date_YYYYMMDD_T_dash_time_ms, 5 | date, 6 | geoPointObject, 7 | keyword, 8 | keywordArray, 9 | text, 10 | textArray, 11 | textWithKeyword, 12 | textWithKeywordArray, 13 | } from '$lib/elasticsearch/types/fields.js'; 14 | import { 15 | boolean, 16 | float, 17 | integer, 18 | unionOf, 19 | } from '$lib/types/index.js'; 20 | 21 | export const esDates = unionOf( 22 | date, 23 | date_YYYYMMDD_dash, 24 | date_YYYYMMDD_dash_time, 25 | date_YYYYMMDD_T_dash_time_ms 26 | ); 27 | 28 | export const esNumeric = unionOf( 29 | boolean, 30 | date, 31 | date_YYYYMMDD_dash, 32 | date_YYYYMMDD_dash_time, 33 | float, 34 | integer, 35 | ); 36 | 37 | export const esNumericButBoolean = unionOf( 38 | date, 39 | date_YYYYMMDD_dash, 40 | date_YYYYMMDD_dash_time, 41 | float, 42 | integer, 43 | ); 44 | 45 | export const esSearchableField = unionOf( 46 | boolean, 47 | date_YYYYMMDD_dash_time, 48 | date_YYYYMMDD_dash, 49 | date, 50 | float, 51 | geoPointObject, 52 | integer, 53 | keyword, 54 | keywordArray, 55 | textWithKeyword, 56 | textWithKeywordArray, 57 | ); 58 | 59 | export const esSearchableString = unionOf( 60 | keyword, 61 | keywordArray, 62 | textWithKeyword, 63 | textWithKeywordArray, 64 | ); 65 | 66 | export const esStrings = unionOf( 67 | keyword, 68 | keywordArray, 69 | text, 70 | textArray, 71 | textWithKeyword, 72 | textWithKeywordArray, 73 | ); 74 | 75 | // TODO at some point we might need to use 76 | // https://www.elastic.co/guide/en/elasticsearch/reference/7.9/mapping-types.html 77 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/utils/aggQuery.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | 3 | import aggsIdByFieldType from '$lib/elasticsearch/config/aggsIdByFieldType.js'; 4 | import {getESType} from '$lib/utils/specs.js'; 5 | 6 | const hasKeywordField = _.anyOf([ 7 | _.is('textWithKeyword'), 8 | _.is('textWithKeywordArray'), 9 | ]); 10 | 11 | const createKeywordFieldQuery = fieldName => ({field: `${fieldName}.keyword`}); 12 | 13 | function defaultAggregationFactory (fieldName, fieldSpec) { 14 | const fieldType = getESType(fieldSpec); 15 | if (hasKeywordField(fieldType)) { 16 | return createKeywordFieldQuery(fieldName); 17 | } 18 | return { field: fieldName }; 19 | } 20 | 21 | const queryFactories = { 22 | // composite: () => ({}), 23 | // filter: () => ({}), 24 | // filters: () => ({}), 25 | // sampler: () => ({}), 26 | // eslint-disable-next-line no-unused-vars 27 | weighted_avg: () => ({}) 28 | } 29 | 30 | export function buildAggregation (aggId, fieldName, fieldSpec) { 31 | if (aggId in queryFactories) { 32 | return queryFactories[aggId](fieldName, fieldSpec); 33 | } 34 | 35 | return defaultAggregationFactory(fieldName, fieldSpec); 36 | } 37 | 38 | export function constructQuery (schema) { 39 | const aggs = {}; 40 | for (let fieldName in schema) { 41 | const fieldType = getESType(schema[fieldName]); 42 | const aggIds = aggsIdByFieldType[fieldType]; 43 | for (let i in aggIds) { 44 | const aggId = aggIds[i]; 45 | const aggKey = `${fieldName}_${aggId}`; 46 | aggs[aggKey] = { 47 | [aggId]: buildAggregation(aggId, fieldName, schema[fieldName]) 48 | }; 49 | } 50 | } 51 | 52 | return { 53 | size: 0, 54 | aggs 55 | }; 56 | } 57 | -------------------------------------------------------------------------------- /specs/responses/examples/terms/response.json: -------------------------------------------------------------------------------- 1 | { 2 | "primary": { 3 | "doc_count_error_upper_bound": 5949, 4 | "sum_other_doc_count": 2704253, 5 | "buckets": [ 6 | { 7 | "key": "", 8 | "doc_count": 967649 9 | }, 10 | { 11 | "key": "Humans", 12 | "doc_count": 115416 13 | }, 14 | { 15 | "key": "Animals", 16 | "doc_count": 77388 17 | }, 18 | { 19 | "key": "Goals", 20 | "doc_count": 50954 21 | }, 22 | { 23 | "key": "Mice", 24 | "doc_count": 31962 25 | }, 26 | { 27 | "key": "Female", 28 | "doc_count": 23720 29 | }, 30 | { 31 | "key": "United States", 32 | "doc_count": 17385 33 | }, 34 | { 35 | "key": "Male", 36 | "doc_count": 15240 37 | }, 38 | { 39 | "key": "Rats", 40 | "doc_count": 14940 41 | }, 42 | { 43 | "key": "Child", 44 | "doc_count": 14118 45 | }, 46 | { 47 | "key": "DNA", 48 | "doc_count": 13309 49 | }, 50 | { 51 | "key": "Brain", 52 | "doc_count": 12787 53 | }, 54 | { 55 | "key": "Mutation", 56 | "doc_count": 10901 57 | }, 58 | { 59 | "key": "Public Health", 60 | "doc_count": 10590 61 | }, 62 | { 63 | "key": "Phenotype", 64 | "doc_count": 10449 65 | }, 66 | { 67 | "key": "Neoplasms", 68 | "doc_count": 10443 69 | }, 70 | { 71 | "key": "RNA, Messenger", 72 | "doc_count": 9693 73 | }, 74 | { 75 | "key": "Signal Transduction", 76 | "doc_count": 8605 77 | }, 78 | { 79 | "key": "Adult", 80 | "doc_count": 8596 81 | }, 82 | { 83 | "key": "Models, Animal", 84 | "doc_count": 8412 85 | } 86 | ] 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/percentile_ranks.js: -------------------------------------------------------------------------------- 1 | import {esNumeric} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | arrayOf, 5 | booleanD, 6 | integer, 7 | number, 8 | objectOf, 9 | string, 10 | } from '$lib/types/index.js'; 11 | import { 12 | keyed, 13 | field, 14 | missing, 15 | script as scriptDoc, 16 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 17 | import response from '$lib/elasticsearch/aggs/response/numToNum.js'; 18 | 19 | export default { 20 | id: 'percentile_ranks', 21 | availability: { 22 | from: '1.3' 23 | }, 24 | docPath: '/search-aggregations-metrics-percentile-rank-aggregation.html', 25 | docs: 'Calculates one or more percentile ranks over numeric values extracted from the aggregated documents.', 26 | fieldType: esNumeric, 27 | label: 'Percentile Ranks', 28 | request: { // [0] 29 | field: string, 30 | hdr: optional(objectOf({ 31 | number_of_significant_value_digits: integer 32 | })), 33 | keyed: optional(booleanD(true)), 34 | missing: optional(number), 35 | script: optional(script), 36 | values: arrayOf(number), 37 | }, 38 | requestDoc: { 39 | field, 40 | hdr: '(High Dynamic Range Histogram) is an alternative implementation that can be useful when calculating percentile ranks for latency measurements as it can be faster than the t-digest implementation with the trade-off of a larger memory footprint.', 41 | keyed, 42 | missing, 43 | script: scriptDoc, 44 | values: 'List of treshold values to use for ranking.', 45 | }, 46 | response, 47 | tag: 'metric', 48 | version: '7.9', 49 | }; 50 | 51 | // [0] 7.9: no params table 52 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/scripted_metric.js: -------------------------------------------------------------------------------- 1 | import {optional, metricScript} from '$lib/elasticsearch/types/params.js'; 2 | import {any, object} from '$lib/types/index.js'; 3 | import response from '$lib/elasticsearch/aggs/response/value.js'; 4 | 5 | export default { 6 | id: 'scripted_metric', 7 | availability: { 8 | from: '1.4' 9 | }, 10 | docPath: '/search-aggregations-metrics-scripted-metric-aggregation.html', 11 | docs: 'A metric aggregation that executes using scripts to provide a metric output.', 12 | fieldType: any, 13 | label: 'Scripted Metric', 14 | request: { // [0] 15 | combine_script: metricScript, 16 | init_script: optional(metricScript), 17 | map_script: metricScript, 18 | params: optional(object), 19 | reduce_script: metricScript, 20 | }, 21 | requestDoc: { 22 | combine_script: 'Executed once on each shard after document collection is complete. Allows the aggregation to consolidate the state returned from each shard.', 23 | init_script: 'Executed prior to any collection of documents. Allows the aggregation to set up any initial state.', 24 | map_script: 'Executed once per document collected.', 25 | params: 'An object whose contents will be passed as variables to the init_script, map_script and combine_script. This can be useful to allow the user to control the behavior of the aggregation and for storing state between the scripts.', 26 | reduce_script: 'Executed once on the coordinating node after all shards have returned their results. The script is provided with access to a variable states which is an array of the result of the combine_script on each shard.', 27 | }, 28 | response, 29 | tag: 'metric', 30 | version: '7.9', 31 | }; 32 | 33 | // [0] 7.9: no params table 34 | -------------------------------------------------------------------------------- /be/src/hooks.js: -------------------------------------------------------------------------------- 1 | import { 2 | authenticate, 3 | parseBasicAuth 4 | } from 'dap_dv_backends_utils/auth/authentication.mjs'; 5 | import * as _ from 'lamb'; 6 | 7 | import {CROSS_ORIGIN_DOMAINS, PROTECTED_DATASETS} from './conf.js'; 8 | 9 | const AUTH_ENDPOINT = 'https://authentication.dap-tools.uk/authenticate'; 10 | 11 | const isCrossOriginRequestAuthorised = request => { 12 | const {origin} = request.headers; 13 | const isAllowed = _.reduce( 14 | CROSS_ORIGIN_DOMAINS, 15 | (acc, curr) => acc || curr.test(origin), 16 | false 17 | ); 18 | return isAllowed; 19 | } 20 | 21 | export const authenticationHook = async req => { 22 | 23 | req.notAuthorised = false; 24 | 25 | if ('authorization' in req.headers) { 26 | const authHeader = req.headers.authorization; 27 | if (!authHeader.startsWith('Basic')) { 28 | req.notAuthorised = true; 29 | req.errorMessage = 'Basic Authorization Header required'; 30 | } 31 | const { email, token } = parseBasicAuth(authHeader); 32 | const isAuthorized = await authenticate(AUTH_ENDPOINT, email, token); 33 | // if user is authenticated then they can access all datasets 34 | if (!isAuthorized) { 35 | req.notAuthorised = true; 36 | req.errorMessage = 'Token cannot be authenticated'; 37 | } 38 | } else { 39 | // check that the non-signed request is coming from dapsboard 40 | if (!isCrossOriginRequestAuthorised(req)) { 41 | req.notAuthorised = true; 42 | req.errorMessage = 'CORS policy is blocking this request'; 43 | } 44 | const urlParts = _.split(req.url, '/'); 45 | const dataset = urlParts.at(-2).trim(); 46 | if (_.isIn(PROTECTED_DATASETS, dataset)) { 47 | req.notAuthorised = true; 48 | req.errorMessage = 'This dataset is protected'; 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/top_hits.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, sortOrder} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | arrayOf, 5 | integerD, 6 | objectOf, 7 | recordLike, 8 | string, 9 | unionOf 10 | } from '$lib/types/index.js'; 11 | import response from '$lib/elasticsearch/aggs/response/hits.js'; 12 | 13 | export default { 14 | id: 'top_hits', 15 | availability: { 16 | from: '1.3' 17 | }, 18 | docPath: '/search-aggregations-metrics-top-hits-aggregation.html', 19 | docs: 'Keeps track of the most relevant document being aggregated. This aggregator is intended to be used as a sub aggregator, so that the top matching documents can be aggregated per bucket.', 20 | fieldType: esSearchableField, // TBD 21 | label: 'Top Hits', 22 | needsParent: true, 23 | request: { // [0] 24 | _source: optional(objectOf({ 25 | includes: unionOf(string, arrayOf(string)) 26 | })), 27 | from: optional(integerD(0, true)), 28 | size: optional(integerD(3)), 29 | sort: optional(arrayOf(recordLike({ 30 | values: objectOf({ 31 | order: sortOrder 32 | }) 33 | }))), 34 | }, 35 | requestDoc: { 36 | _source: 'If _source is requested then just the part of the source of the nested object is returned, not the entire source of the document.', 37 | from: 'The offset from the first result you want to fetch.', 38 | size: 'The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned.', 39 | sort: 'How the top matching hits should be sorted. By default the hits are sorted by the score of the main query.', 40 | }, 41 | response, 42 | tag: 'metric', 43 | version: '7.9', 44 | }; 45 | 46 | // [0] 7.9: no params table 47 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/BooleanRadios.svelte: -------------------------------------------------------------------------------- 1 | 26 | 27 |
    32 | {#if isNil(defaultValue)} 33 | 34 | 41 | 42 | 43 | {/if} 44 | 45 | 52 | 53 | 54 | 55 | 62 | 63 | 64 |
    65 | 66 | 80 | -------------------------------------------------------------------------------- /fe/src/lib/app/utils/net.js: -------------------------------------------------------------------------------- 1 | import {get} from 'svelte/store'; 2 | 3 | import { 4 | _credentials, 5 | _isAuthModalOpen 6 | } from '$lib/app/stores/auth.js'; 7 | import {request} from '$lib/utils/net.js'; 8 | 9 | const authBaseURL = 'https://api.dap-tools.uk/auth'; 10 | 11 | const getTokenRequestEndpointURL = email => 12 | `${authBaseURL}/request?email=${email}`; 13 | const getTokenVerifyEndpointURL = (email, token) => 14 | `${authBaseURL}/authenticate?email=${email}&token=${token}`; 15 | 16 | export const authedRequest = async ( 17 | method, 18 | url, 19 | options = {} 20 | ) => { 21 | const headers = {...options.headers}; 22 | 23 | const credentials = get(_credentials); 24 | if (credentials?.token) { 25 | const {email, token} = credentials; 26 | headers.Authorization = `Basic ${btoa(`${email}:${token}`)}`; 27 | } 28 | let response; 29 | try { 30 | response = await request( 31 | method, 32 | url, 33 | { 34 | ...options, 35 | headers 36 | } 37 | ); 38 | } catch (error) { 39 | if (error.httpStatus === 401 || error.httpStatus === 403) { 40 | _credentials.set(null); 41 | _isAuthModalOpen.set(true); 42 | } 43 | throw error; 44 | } 45 | return response; 46 | } 47 | 48 | export const requestNestaToken = async email => { 49 | const endpoint = getTokenRequestEndpointURL(email); 50 | const response = await fetch(endpoint); 51 | 52 | const result = {}; 53 | if (response.status !== 204) { 54 | result.error = await response.text(); 55 | } 56 | 57 | return result; 58 | } 59 | 60 | export const verifyNestaToken = async (email, token) => { 61 | const endpoint = getTokenVerifyEndpointURL(email, token); 62 | const response = await fetch(endpoint); 63 | const result = await response.text(); 64 | 65 | return JSON.parse(result); 66 | } 67 | -------------------------------------------------------------------------------- /fe/src/routes/+layout.svelte: -------------------------------------------------------------------------------- 1 | 39 | 40 | 41 | 48 | 49 |
    50 |
    52 | 53 |
    54 | 55 |
    56 | 57 | {#if $_isAuthModalOpen} 58 | 59 | {/if} 60 | 61 | 76 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/ObjectEditor.svelte: -------------------------------------------------------------------------------- 1 | 35 | 36 |
    37 | {#each aggParamsInfo as paramInfo (`${id}-${paramInfo.paramId}`)} 38 | {#if !isParamSelector 39 | || ['__selection', value?.__selection].includes(paramInfo.paramId) 40 | } 41 | handleChange({[paramInfo.paramId]: event.detail})} 51 | help={false} 52 | /> 53 | {/if} 54 | {/each} 55 |
    56 | 57 | 63 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/PanelMenu.svelte: -------------------------------------------------------------------------------- 1 | 28 | 29 |
    35 | 39 | 43 |
    44 | Settings 45 |
    46 |
      47 | 48 |
    49 |
    50 |
    51 | 52 | 87 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/rare_terms.js: -------------------------------------------------------------------------------- 1 | import {esSearchableString} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import { 3 | optional, 4 | termsExclude, 5 | termsInclude, 6 | } from '$lib/elasticsearch/types/params.js'; 7 | import { 8 | floatD, 9 | integerD, 10 | string, 11 | } from '$lib/types/index.js'; 12 | import { 13 | field, 14 | missing, 15 | termsExclude as termsExcludeDoc, 16 | termsInclude as termsIncludeDoc, 17 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 18 | import response from '$lib/elasticsearch/aggs/response/bucketsDocCount.js'; 19 | 20 | export default { 21 | id: 'rare_terms', 22 | availability: { 23 | from: '7.3' 24 | }, 25 | collect_mode: `breadth_first`, 26 | docPath: '/search-aggregations-bucket-rare-terms-aggregation.html', 27 | docs: 'Multi-bucket value source based aggregation which finds "rare" terms - terms that are at the long-tail of the distribution and are not frequent.', 28 | fieldType: esSearchableString, 29 | label: 'Rare terms', 30 | request: { // [1] 31 | exclude: optional(termsExclude), 32 | field: string, 33 | include: optional(termsInclude), 34 | max_doc_count: optional(integerD(1)), 35 | missing: optional(string), 36 | precision: optional(floatD(0.01)), 37 | }, 38 | requestDoc: { 39 | exclude: termsExcludeDoc, 40 | field, 41 | include: termsIncludeDoc, 42 | max_doc_count: 'The maximum number of documents a term should appear in.', 43 | missing, 44 | precision: 'The precision of the internal CuckooFilters. Smaller precision leads to better approximation, but higher memory usage. Cannot be smaller than 0.00001. Default 0.01', 45 | }, 46 | response, 47 | subAggs: true, 48 | tag: 'bucketing', 49 | version: '7.9', 50 | }; 51 | 52 | // [1] params table at: https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-bucket-rare-terms-aggregation.html#_syntax 53 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/Select.svelte: -------------------------------------------------------------------------------- 1 | 29 | 30 |
      31 | {#each options as opt} 32 | {#if !(opt.disabled && hideDisabled) || opt.value === selectedOption} 33 |
    • handleClick(opt.value, opt.disabled)} 38 | class='clickable' 39 | > 40 | 41 | {opt.text} 42 | 43 |
    • 44 | {/if} 45 | {/each} 46 |
    47 | 48 | 82 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/route.js: -------------------------------------------------------------------------------- 1 | import {createMachina} from '../utils.js'; 2 | 3 | import {createHistoryStores} from './history.context.js'; 4 | import {historyOptions} from './history.options.js'; 5 | import {createResultsStores} from './results.context.js'; 6 | import {resultsOptions} from './results.options.js'; 7 | import {exploreConfig} from './route.config.js'; 8 | import {createRouteStores} from './route.context.js'; 9 | import {routeOptions} from './route.options.js'; 10 | import {createSelectionStores} from './selecting.context.js'; 11 | import {selectingOptions} from './selecting.options.js'; 12 | import {createSearchStores} from './search.context.js'; 13 | import {searchOptions} from './search.options.js'; 14 | 15 | /* TODO 16 | // nm/svizzle/utils/array-[array-object].js: makeMergeBy 17 | // nm/app/machines/utils: mergeMachines 18 | 19 | const mergeMachines = makeMergeBy(['actions', 'guards', 'services']); 20 | mergeMachines([ 21 | historyOptions, 22 | resultsOptions, 23 | routeOptions, 24 | selectingOptions 25 | ]) 26 | */ 27 | export const exploreOptions = { 28 | actions: { 29 | ...historyOptions.actions, 30 | ...resultsOptions.actions, 31 | ...routeOptions.actions, 32 | ...selectingOptions.actions, 33 | ...searchOptions.actions, 34 | }, 35 | guards: { 36 | ...historyOptions.guards, 37 | ...resultsOptions.guards, 38 | ...routeOptions.guards, 39 | ...selectingOptions.guards, 40 | ...searchOptions.guards, 41 | }, 42 | services: { 43 | ...resultsOptions.services, 44 | ...searchOptions.services, 45 | } 46 | }; 47 | 48 | export const contextStores = { 49 | ...createHistoryStores(), 50 | ...createResultsStores(), 51 | ...createRouteStores(), 52 | ...createSelectionStores(), 53 | ...createSearchStores(), 54 | }; 55 | 56 | export const createExploreMachine = () => createMachina( 57 | exploreConfig, 58 | exploreOptions, 59 | contextStores 60 | ); 61 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/config/aggsLabels.js: -------------------------------------------------------------------------------- 1 | export const metricLabels = { 2 | // boxplot: 'BoxPlot', 3 | // scripted_metric: 'Scripted Metric', 4 | // string_stats: 'String Statistics', 5 | // string_stats: 'String Stats' // >= 7.6 6 | avg: 'Average', 7 | cardinality: 'Cardinality', 8 | extended_stats: 'Extended Statistics', 9 | geo_bounds: 'Geographic Bounds', 10 | geo_centroid: 'Geographic Centroid', 11 | max: 'Maximum', 12 | median_absolute_deviation: 'Median Abs. Dev.', // >= 6.6 13 | min: 'Minimum', 14 | percentile_ranks: 'Percentile Ranks', 15 | percentiles: 'Percentiles', 16 | stats: 'Statistics', 17 | sum: 'Sum', 18 | top_hits: 'Top Hits', 19 | value_count: 'Value Count' 20 | } 21 | 22 | export const metricMultiFieldLabels = { 23 | // matrix_stats: 'Matrix Stats', 24 | weighted_avg: 'Weighted Average', 25 | } 26 | 27 | export const topBucketLabels = { 28 | // global: 'Global' 29 | } 30 | 31 | export const bucketLabels = { 32 | // auto_date_histogram: 'Auto Date Histogram' // >= 6.5 33 | // children: 'Children', 34 | auto_date_histogram: 'Auto Date Histogram', 35 | date_histogram: 'Date Histogram', 36 | date_range: 'Date Range', 37 | geo_distance: 'Geo Distance', 38 | geohash_grid: 'Geo Hash', 39 | histogram: 'Histogram', 40 | // ip_range: 'IP Range', 41 | missing: 'Missing', 42 | nested: 'Nested', 43 | range: 'Range', 44 | rare_terms: 'Rare Terms', 45 | significant_terms: 'Significant Terms', 46 | significant_text: 'Significant Text', 47 | terms: 'Terms', 48 | } 49 | 50 | // needs to have sub-aggs to work 51 | export const bucketParentLabels = { 52 | sampler: 'Sampler', 53 | } 54 | export const bucketMultiFieldLabels = { 55 | // composite: 'Composite', 56 | // filter: 'Filter', 57 | // filters: 'Filters', 58 | } 59 | 60 | export const nestedBucketLabels = { 61 | diversified_sampler: 'Diversified Sampler', 62 | reverse_nested: 'Reverse Nested', 63 | } 64 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/route.options.js: -------------------------------------------------------------------------------- 1 | import {actions, assign} from 'xstate'; 2 | 3 | import {stringifyContextStores} from '$lib/app/machines/utils.js'; 4 | import { 5 | resetSources, 6 | selectSource, 7 | selectDataset, 8 | } from '$lib/app/stores/exploreStores.js'; 9 | import {getDatasetOf} from '$lib/app/utils/data.js'; 10 | import {getBeSearchURL} from '$lib/utils/specs.js'; 11 | 12 | const {choose, log} = actions; 13 | 14 | const setDataset = (ctx, {project, source, version}) => { 15 | ctx.dataset.set({project, source, version}); 16 | 17 | return ctx; 18 | } 19 | 20 | const setNeededFields = (ctx, {neededFields}) => { 21 | ctx._neededFields.set(neededFields?.split(',') || []); 22 | 23 | return ctx; 24 | } 25 | 26 | const setURL = (ctx, {project, source, version}) => { 27 | const dataset = getDatasetOf({project, source, version}); 28 | const queryURL = getBeSearchURL(dataset); 29 | ctx.queryURL.set(queryURL); 30 | 31 | return ctx; 32 | } 33 | 34 | const loggedEvents = [ 35 | // 'xstate.init', 36 | // 'QUERY_UPDATED', 37 | // 'SEARCH_QUERY_UPDATED' 38 | ]; 39 | 40 | export const routeOptions = { 41 | actions: { 42 | conditionalLog: choose([ 43 | { 44 | cond: 'logGuard', 45 | actions: [ 46 | log( 47 | (context, event) => `---- context: ${stringifyContextStores(context)}\n\n---- event: ${JSON.stringify(event)}`, 48 | 'conditionalLog' 49 | ) 50 | ] 51 | }, 52 | ]), 53 | setDataset: assign(setDataset), 54 | setNeededFields: assign(setNeededFields), 55 | setURL: assign(setURL), 56 | resetSources: () => { 57 | resetSources() 58 | }, 59 | selectDataset: (ctx, {project, source, version}) => { 60 | selectDataset({project, source, version}) 61 | }, 62 | selectSource: (ctx, {source}) => { 63 | selectSource(source) 64 | }, 65 | }, 66 | guards: { 67 | logGuard: (context, event) => loggedEvents.includes(event.type) 68 | } 69 | }; 70 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/test/check_exports.spec.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert'; 2 | import path from 'node:path'; 3 | import {fileURLToPath} from 'node:url'; 4 | 5 | import * as _ from 'lamb'; 6 | import {readDir, readFile} from '@svizzle/file'; 7 | import { 8 | isIterableNotEmpty, 9 | makeEndsWith, 10 | makePrefixed, 11 | makeStartsWith, 12 | sliceStringAt, 13 | } from '@svizzle/utils'; 14 | 15 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 16 | 17 | const SRC_DIR_PATH = path.resolve(__dirname, '../spec/'); 18 | const INDEX_PATH = path.resolve(SRC_DIR_PATH, 'index.js'); 19 | const REGEX = /export \{default as .*\} from '\.\/(.*).js';\n/ug; 20 | 21 | const isAggModule = _.allOf([ 22 | makeEndsWith('.js'), 23 | _.not(makeStartsWith('_')), 24 | _.not(makeStartsWith('index')), 25 | _.not(makeEndsWith('.todo.js')), 26 | _.not(makeEndsWith('.spec.js')), 27 | ]); 28 | 29 | describe('elasticsearch/aggs/spec/: exports', function () { 30 | it('All agg valid modules are exported', async function () { 31 | const leftovers = await Promise.all([ 32 | readDir(SRC_DIR_PATH).then(_.pipe([ 33 | _.filterWith(isAggModule), 34 | _.mapWith(sliceStringAt([0, -3])), 35 | _.sortWith([]) 36 | ])), 37 | 38 | readFile(INDEX_PATH, 'utf-8') 39 | .then(srcIndex => [...srcIndex.matchAll(REGEX)].map(_.getAt(1))) 40 | .then(_.sortWith([])), 41 | ]) 42 | .then(([modules, exported]) => _.pullFrom(modules, exported)) 43 | .catch(err => console.error(err)); 44 | 45 | const gotDupes = isIterableNotEmpty(leftovers); 46 | if (gotDupes) { 47 | const list = leftovers.map(makePrefixed('- ')).join('\n'); 48 | console.log(`\n======================\n✋`) 49 | console.log(`elasticsearch/aggs/spec/index.js not exporting modules with these filenames:\n\n${list}`) 50 | console.log(`======================\n`) 51 | } 52 | 53 | assert.deepStrictEqual(gotDupes, false); 54 | }); 55 | }); 56 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/geo_distance.js: -------------------------------------------------------------------------------- 1 | import {geoPoint} from '$lib/elasticsearch/types/fields.js'; 2 | import {optional} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | arrayOf, 5 | booleanD, 6 | enumsOf, 7 | number, 8 | objectOf, 9 | string, 10 | someOf, 11 | unionOf, 12 | } from '$lib/types/index.js'; 13 | import {field, keyed} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 14 | import response from '$lib/elasticsearch/aggs/response/bucketsRange.js'; 15 | 16 | export default { 17 | id: 'geo_distance', 18 | availability: { 19 | from: '1.3' 20 | }, 21 | docPath: '/search-aggregations-bucket-geodistance-aggregation.html', 22 | docs: 'A multi-bucket aggregation that works on geo_point fields and conceptually works very similar to the range aggregation. The user can define a point of origin and a set of distance range buckets.', 23 | fieldType: unionOf(geoPoint, arrayOf(geoPoint)), 24 | label: 'Geo Distance', 25 | request: { // [0] 26 | distance_type: enumsOf(['arc', 'plane'], 'arc'), 27 | field: string, 28 | keyed: optional(booleanD(false)), 29 | origin: string, 30 | ranges: arrayOf(objectOf({ 31 | key: optional(string), 32 | __extent: someOf({ 33 | from: number, 34 | to: number, 35 | }), 36 | })), 37 | unit: enumsOf(['m', 'mi', 'in', 'yd', 'km', 'cm', 'mm'], 'm'), // TODO make a type 38 | }, 39 | requestDoc: { 40 | distance_type: 'The distance calculation type can be set using the distance_type parameter.', 41 | field, 42 | keyed, 43 | origin: 'The `origin` point can accept all formats supported by the `geo_point` type.', 44 | ranges: 'Array of objects of shape {from: string, to: string}', 45 | unit: 'By default, the distance unit is m (meters) but it can also accept: mi (miles), in (inches), yd (yards), km (kilometers), cm (centimeters), mm (millimeters).', 46 | }, 47 | response, 48 | subAggs: true, 49 | tag: 'bucketing', 50 | version: '7.9', 51 | }; 52 | 53 | // [0] 7.9: no params table 54 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/utils.js: -------------------------------------------------------------------------------- 1 | import {interpret, Machine} from 'xstate'; 2 | import {get, writable} from 'svelte/store'; 3 | import * as _ from 'lamb'; 4 | import {stringify} from '@svizzle/utils'; 5 | 6 | export function bindToStore (interpreter) { 7 | const machineStore = writable(interpreter.initialState, () => { 8 | // service.start(); 9 | // Unsubscribing here works for machines subcribed to only once, 10 | // but can wreak havoc if subcsriptions are changiong all the time. 11 | // TODO Perhaps implement reference counting or remove if intended 12 | // for more general use. 13 | // return () => service.stop(); 14 | }); 15 | 16 | interpreter.onTransition(nextState => { 17 | machineStore.set(nextState); 18 | }); 19 | 20 | return { 21 | subscribe: machineStore.subscribe, 22 | send: interpreter.send, 23 | }; 24 | } 25 | 26 | /** 27 | * Creates an xstate machine and places it into a svelte store. 28 | * Also creates a svelte store to be passed to the machine as context 29 | * 30 | * @param machineConfig - A valid xstate configuration object 31 | * @param machineOptions - A valid xstate options object: guards, actions, services, activities 32 | * @param contextStores - The default store(s) to be passed to xstate as context 33 | * @returns An object containing two objects. One being a store containing the xstate machine, the other containing the stores passed in as context. 34 | */ 35 | export function createMachina (machineConfig, machineOptions, contextStores) { 36 | const machine = Machine({ 37 | ...machineConfig, 38 | context: contextStores 39 | }, machineOptions); 40 | 41 | // const interpreter = interpret(machine, { devTools: process.env.INSPECT === 'true' }); 42 | const interpreter = interpret(machine, { devTools: false }); 43 | 44 | interpreter.start(); 45 | 46 | return { 47 | machine: bindToStore(interpreter), 48 | contextStores 49 | }; 50 | } 51 | 52 | 53 | export const stringifyContextStores = _.pipe([_.mapValuesWith(get), stringify]); 54 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/percentiles.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | arrayOf, 5 | booleanD, 6 | float, 7 | integer, 8 | integerD, 9 | objectOf, 10 | string, 11 | number 12 | } from '$lib/types/index.js'; 13 | import { 14 | field, 15 | keyed, 16 | missing, 17 | script as scriptDoc, 18 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 19 | import response from '$lib/elasticsearch/aggs/response/numToNum.js'; 20 | 21 | export default { 22 | id: 'percentiles', 23 | availability: { 24 | from: '1.3' 25 | }, 26 | docPath: '/search-aggregations-metrics-percentile-aggregation.html', 27 | docs: 'Calculates one or more percentiles over numeric values extracted from the aggregated documents.', 28 | fieldType: esNumericButBoolean, 29 | label: 'Percentiles', 30 | request: { // [0] 31 | field: string, 32 | hdr: optional(objectOf({ 33 | number_of_significant_value_digits: integer 34 | })), 35 | keyed: optional(booleanD(true)), 36 | missing: optional(number), // [1] 37 | percents: optional(arrayOf(float, [1, 5, 25, 50, 75, 95, 99])), 38 | script: optional(script), 39 | tdigest: optional(objectOf({ 40 | compression: integerD(100) 41 | })), 42 | }, 43 | requestDoc: { 44 | field, 45 | hdr: 'hdr object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object. Format: {number_of_significant_value_digits: integer}', 46 | keyed, 47 | missing, 48 | percents: 'List of treshold values to use for ranking.', 49 | script: scriptDoc, 50 | tdigest: 'Format: {compression: integer}: The compression parameter limits the maximum number of nodes to 20 * compression.', 51 | }, 52 | response, 53 | tag: 'metric', 54 | version: '7.9', 55 | }; 56 | 57 | // [0] 7.9: no params table 58 | // [1] TODO add constraint to be same type as the field type 59 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/explore/results.config.js: -------------------------------------------------------------------------------- 1 | export const resultsConfig = { 2 | id: 'Results', 3 | initial: 'Idle', 4 | on: { 5 | AGGS_HIERARCHY_UPDATED: { 6 | target: '#Results.CheckMatching' 7 | } 8 | }, 9 | states: { 10 | Idle: { 11 | }, 12 | CheckMatching: { 13 | entry: ['conditionalLog'], 14 | always: [ 15 | { 16 | target: 'Matching', 17 | cond: 'isMatching' 18 | }, 19 | { target: 'Dirty' } 20 | ] 21 | }, 22 | Matching: { 23 | id: 'Matching', 24 | entry: [ 25 | 'conditionalLog', 26 | 'sendResultsUpdated' 27 | ], 28 | }, 29 | Dirty: { 30 | id: 'Dirty', 31 | initial: 'CheckingCache', 32 | entry: ['conditionalLog'], 33 | states: { 34 | CheckingCache: { 35 | entry: [ 36 | 'conditionalLog', 37 | 'setCacheKey', 38 | ], 39 | always: [ 40 | { 41 | target: '#Matching', 42 | cond: 'isInCache', 43 | actions: ['loadFromCache'] 44 | }, 45 | { 46 | target: 'UpdatingQueue', 47 | actions: ['initQueue'] 48 | } 49 | ] 50 | }, 51 | UpdatingQueue: { 52 | entry: [ 53 | 'conditionalLog', 54 | 'updateQueue', 55 | ], 56 | always: [ 57 | { 58 | target: 'Pending', 59 | cond: 'hasQueuedAggs', 60 | }, 61 | { 62 | target: '#Matching', 63 | actions: [ 64 | 'updateCache', 65 | 'updateCurrentResults', 66 | 'resetQueue', 67 | ] 68 | }, 69 | ] 70 | }, 71 | Pending: { 72 | entry: ['conditionalLog'], 73 | invoke: { 74 | id: 'request', 75 | src: 'apiRequest', 76 | onDone: { 77 | target: 'UpdatingQueue', 78 | actions: [ 79 | 'updateQueueResults', 80 | ] 81 | }, 82 | onError: { 83 | target: 'Error', 84 | } 85 | } 86 | }, 87 | Error: { 88 | id: 'Error', 89 | entry: [ 90 | 'conditionalLog', 91 | 'getError' 92 | ], 93 | } 94 | } 95 | } 96 | } 97 | }; 98 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/formediting.config.js: -------------------------------------------------------------------------------- 1 | export const formEditingConfig = { 2 | initial: 'Idle', 3 | states: { 4 | Idle: { 5 | on: { 6 | AUTO_EXEC_TOGGLED: { 7 | target: 'Idle', 8 | actions: ['toggleAutoExecute','log'] 9 | }, 10 | DATASET_CHANGED: { 11 | target: 'Idle', 12 | actions: ['setDataset'] 13 | }, 14 | FORM_ADDED: { 15 | target: 'Idle', 16 | actions: ['spawnNestedForm'] 17 | }, 18 | SELECTION_COMPLETED: { 19 | target: 'Idle', 20 | actions: ['configureSubforms'] 21 | }, 22 | SELECTION_INCOMPLETE: { 23 | target: 'Idle', 24 | actions: ['deleteNestedForms'] 25 | }, 26 | FORM_SELECTED: { 27 | target: 'Idle', 28 | actions: [ 29 | 'selectForm', 30 | 'sendEdited' 31 | ] 32 | }, 33 | HIDE_DISABLED_FORMS_TOGGLED: { 34 | target: 'Idle', 35 | actions: ['toggleHideDisabledForms'] 36 | }, 37 | HIDE_DISABLED_AGGS_TOGGLED: { 38 | target: 'Idle', 39 | actions: ['toggleHideDisabledAggs'] 40 | }, 41 | HIDE_DISABLED_DSETS_TOGGLED: { 42 | target: 'Idle', 43 | actions: ['toggleHideDisabledDatasets'] 44 | }, 45 | HIDE_DISABLED_FIELDS_TOGGLED: { 46 | target: 'Idle', 47 | actions: ['toggleHideDisabledFields'] 48 | }, 49 | REQUEST_TAB_SELECTED: { 50 | target: 'Idle', 51 | actions: ['setTab'] 52 | }, 53 | RESULT_SIZE_CHANGED: { 54 | target: 'Idle', 55 | actions: ['setResultSize'] 56 | }, 57 | SHOW_FULL_RESPONSE_TOGGLED: { 58 | target: 'Idle', 59 | actions: ['toggleShowFullResponse'] 60 | }, 61 | TYPINGS_CHANGED: { 62 | target: 'Idle', 63 | actions: ['setTypings'] 64 | }, 65 | PARSING_START: { 66 | target: 'Idle', 67 | actions: ['setParsing'] 68 | }, 69 | PARSING_DONE: { 70 | target: 'Idle', 71 | actions: ['setParsingDone'] 72 | }, 73 | FORM_CHANGED: { 74 | target: 'Idle', 75 | actions: ['sendTreeChanged'] 76 | } 77 | } 78 | } 79 | } 80 | }; 81 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/geohash_grid.js: -------------------------------------------------------------------------------- 1 | import {geoBounds, geoPointObject} from '$lib/elasticsearch/types/fields.js'; 2 | import {optional} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | integer, 5 | integerD, 6 | intWithin, 7 | string 8 | } from '$lib/types/index.js'; 9 | import {field} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 10 | import response from '$lib/elasticsearch/aggs/response/bucketsGeohashGrid.js'; 11 | 12 | export default { 13 | id: 'geohash_grid', 14 | availability: { 15 | from: '1.3' 16 | }, 17 | docPath: '/search-aggregations-bucket-geohashgrid-aggregation.html', 18 | docs: 'A multi-bucket aggregation that works on `geo_point` fields and groups points into buckets that represent cells in a grid. The resulting grid can be sparse and only contains cells that have matching data.', 19 | fieldType: geoPointObject, 20 | label: 'Geohash Grid', 21 | request: { // [1] 22 | field: string, 23 | bounds: optional(geoBounds), 24 | precision: optional(intWithin([0, 12], 5)), 25 | shard_size: optional(integer), // [2] 26 | size: optional(integerD(10000)), 27 | }, 28 | requestDoc: { 29 | field, 30 | bounds: 'The bounding box to filter the points in the bucket.', 31 | precision: 'The string length of the geohashes used to define cells/buckets in the results. Defaults to 5.', 32 | shard_size: 'To allow for more accurate counting of the top cells returned in the final result the aggregation defaults to returning max(10,(size x number-of-shards)) buckets from each shard. If this heuristic is undesirable, the number considered from each shard can be over-ridden using this parameter.', 33 | size: 'The maximum number of geohash buckets to return (defaults to 10,000).', 34 | }, 35 | response, 36 | subAggs: true, 37 | tag: 'bucketing', 38 | version: '7.9', 39 | }; 40 | 41 | // [1] params table: https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-aggregations-bucket-geohashgrid-aggregation.html#_options_3 42 | // [2] ES default is -> max(10,(size x number-of-shards)) buckets from each shard. 43 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/history.options.js: -------------------------------------------------------------------------------- 1 | import * as _ from 'lamb'; 2 | import { isObjNotEmpty, isNotNil } from '@svizzle/utils'; 3 | import { get } from 'svelte/store'; 4 | import { RISON } from 'rison2'; 5 | 6 | const pickNonNil = _.pickIf(isNotNil); 7 | 8 | function formData (machine) { 9 | const data = { 10 | name: machine.context.id, 11 | } 12 | const selection = get(machine.context.selection); 13 | 14 | if (selection && isObjNotEmpty(selection)) { 15 | const result = pickNonNil(selection); 16 | isObjNotEmpty(result) && (data.selection = result); 17 | } 18 | const params = get(machine.context.params); 19 | params && isObjNotEmpty(params) && (data.params = params); 20 | return data; 21 | } 22 | 23 | function collectConfiguration (ctx) { 24 | const config = { 25 | size: get(ctx.resultSize), 26 | form: get(get(ctx.selectedForm).machine).context.id, 27 | forms: get(ctx.forms).map(form => formData(get(form.machine))), 28 | } 29 | 30 | const dataset = get(ctx.dataset); 31 | dataset && (config.dataset = dataset); 32 | return config; 33 | } 34 | 35 | function updateEntry (ctx, event) { 36 | if (get(ctx.isParsing)) { 37 | return; 38 | } 39 | const query = RISON.stringify(collectConfiguration(ctx)); 40 | const url = `${window.location.pathname}?q=${query}`; 41 | let updateType = event.init 42 | ? 'pushState' 43 | : 'replaceState'; 44 | if (url === ctx.lastGoodURL) { 45 | updateType = 'replaceState'; 46 | } 47 | history[updateType](null, window.title, url); 48 | } 49 | 50 | export const historyOptions = { 51 | actions: { 52 | /** 53 | * When editing starts reserves a history slot for the session 54 | */ 55 | initEntry: ctx => updateEntry(ctx, {init: true}), 56 | /** 57 | * Updates the history slot on edits 58 | */ 59 | updateEntry, 60 | commitLastGoodURL: ctx => { 61 | ctx.lastGoodURL = location.pathname + location.search; 62 | } 63 | }, 64 | guards: { 65 | isActiveForm: (ctx, event) => { 66 | const selectedForm = get(ctx.selectedForm); 67 | return selectedForm && selectedForm.id === event.formId; 68 | } 69 | } 70 | }; 71 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/weighted_avg.js: -------------------------------------------------------------------------------- 1 | import {esNumericButBoolean} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | object, 5 | objectOf, 6 | string, 7 | number 8 | } from '$lib/types/index.js'; 9 | import {missing, script as scriptDoc} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 10 | import response from '$lib/elasticsearch/aggs/response/value.js'; 11 | 12 | export default { 13 | id: 'weighted_avg', 14 | availability: { 15 | from: '6.4' 16 | }, 17 | docPath: '/search-aggregations-metrics-weight-avg-aggregation.html', 18 | docs: 'Computes the weighted average of numeric values that are extracted from the aggregated documents.', 19 | fieldType: esNumericButBoolean, 20 | label: 'Weighted Average', 21 | request: { // [0] 22 | format: optional(object), 23 | value_type: optional(object), 24 | value: objectOf({ 25 | field: string, 26 | missing: optional(number), // [1] 27 | script: optional(script), 28 | }), 29 | weight: objectOf({ 30 | field: string, 31 | missing: optional(number), // [1] 32 | script: optional(script), 33 | }), 34 | }, 35 | requestDoc: { 36 | format: 'The numeric response formatter.', 37 | value_type: 'A hint about the values for pure scripts or unmapped fields.', 38 | value: 'The configuration for the field or script that provides the values', 39 | 'value.field': 'The field that values should be extracted from.', 40 | 'value.missing': missing, 41 | 'value.script': scriptDoc, 42 | weight: 'The configuration for the field or script that provides the weights', 43 | 'weight.field': 'The field that values should be extracted from.', 44 | 'weight.missing': missing, 45 | 'weight.script': scriptDoc, 46 | }, 47 | response, 48 | tag: 'metric', 49 | version: '7.9', 50 | }; 51 | 52 | // [0] 7.9: params table at https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-metrics-weight-avg-aggregation.html#search-aggregations-metrics-weight-avg-aggregation 53 | // [1] TODO add constraint to be same type as the field type 54 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/svizzle/InputWidget.svelte: -------------------------------------------------------------------------------- 1 | 39 | 40 |
    41 |
    42 | 50 | {#if hasButton} 51 | 57 | {/if} 58 |
    59 |
    60 | 61 | 88 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/t_test.js: -------------------------------------------------------------------------------- 1 | import {esNumeric} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional, script} from '$lib/elasticsearch/types/params.js'; 3 | import { 4 | enumsOf, 5 | object, 6 | objectOf, 7 | string 8 | } from '$lib/types/index.js'; 9 | import response from '$lib/elasticsearch/aggs/response/value.js'; 10 | 11 | export default { 12 | id: 't_test', 13 | availability: { 14 | from: '7.8' 15 | }, 16 | docPath: '/search-aggregations-metrics-ttest-aggregation.html', 17 | docs: 'Performs a statistical hypothesis test in which the test statistic follows a Student’s t-distribution under the null hypothesis on numeric values.', 18 | fieldType: esNumeric, 19 | label: 'T-test', 20 | request: { // [0] 21 | a: objectOf({ 22 | field: string, 23 | filter: optional(object), 24 | script: optional(script), 25 | }), 26 | b: objectOf({ 27 | field: string, 28 | filter: optional(object), 29 | script: optional(script), 30 | }), 31 | type: enumsOf(['paired', 'homoscedastic', 'heteroscedastic']) 32 | }, 33 | requestDoc: { 34 | a: 'First test.', 35 | 'a.field': 'Field of numeric type for the first test.', 36 | 'a.filter': 'Filter for the first test.', 37 | 'a.script': 'Script for the first test.', 38 | b: 'Second test.', 39 | 'b.field': 'First field of numeric type for the second test.', 40 | 'b.filter': 'Filter for the second test.', 41 | 'c.script': 'Script for the first test.', 42 | type: 'The type of the test can be specified using the type parameter.', 43 | }, 44 | response: { 45 | ...response, 46 | doc: { 47 | value: 'The probability value for the test.' 48 | }, 49 | docLong: { 50 | value: 'It is the probability of obtaining results at least as extreme as the result processed by the aggregation, assuming that the null hypothesis is correct (which means there is no difference between population means). Smaller p-value means the null hypothesis is more likely to be incorrect and population means are indeed different.' 51 | } 52 | }, 53 | tag: 'metric', 54 | version: '7.9', 55 | }; 56 | 57 | // [0] 7.9: no params table 58 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/index.js: -------------------------------------------------------------------------------- 1 | export {default as auto_date_histogram} from './auto_date_histogram.js'; 2 | export {default as avg} from './avg.js'; 3 | export {default as boxplot} from './boxplot.js'; 4 | export {default as cardinality} from './cardinality.js'; 5 | export {default as date_histogram} from './date_histogram.js'; 6 | export {default as date_range} from './date_range.js'; 7 | export {default as extended_stats} from './extended_stats.js'; 8 | export {default as geo_bounds} from './geo_bounds.js'; 9 | export {default as geo_centroid} from './geo_centroid.js'; 10 | export {default as geo_distance} from './geo_distance.js'; 11 | export {default as geohash_grid} from './geohash_grid.js'; 12 | export {default as geotile_grid} from './geotile_grid.js'; 13 | export {default as histogram} from './histogram.js'; 14 | export {default as max} from './max.js'; 15 | export {default as median_absolute_deviation} from './median_absolute_deviation.js'; 16 | export {default as min} from './min.js'; 17 | export {default as missing} from './missing.js'; 18 | export {default as percentile_ranks} from './percentile_ranks.js'; 19 | export {default as percentiles} from './percentiles.js'; 20 | export {default as range} from './range.js'; 21 | export {default as rare_terms} from './rare_terms.js'; 22 | export {default as rate} from './rate.js'; 23 | export {default as scripted_metric} from './scripted_metric.js'; 24 | export {default as significant_terms} from './significant_terms.js'; 25 | export {default as significant_text} from './significant_text.js'; 26 | export {default as stats} from './stats.js'; 27 | export {default as string_stats} from './string_stats.js'; 28 | export {default as sum} from './sum.js'; 29 | export {default as t_test} from './t_test.js'; 30 | export {default as terms} from './terms.js'; 31 | export {default as top_hits} from './top_hits.js'; 32 | export {default as top_metrics} from './top_metrics.js'; 33 | export {default as value_count} from './value_count.js'; 34 | export {default as variable_width_histogram} from './variable_width_histogram.js'; 35 | export {default as weighted_avg} from './weighted_avg.js'; 36 | -------------------------------------------------------------------------------- /fe/src/lib/app/machines/builder/history.test.md: -------------------------------------------------------------------------------- 1 | # URL sequences for testing history managemnt 2 | 3 | 1. 4 | - / 5 | - /builder 6 | - /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,params:(interval:4),selection:(aggregation:histogram,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 7 | - BACK: 8 | - /builder 9 | 10 | 2. 11 | - /builder 12 | - /builder?q=(dataset:mosaic_nih_v5,form:primary,forms:!((name:primary,selection:(aggregation:histogram,type:integer))),size:0) 13 | - BACK: 14 | - /builder 15 | - FWD: 16 | - /builder?q=(dataset:mosaic_nih_v5,form:primary,forms:!((name:primary,selection:(aggregation:histogram,type:integer))),size:0) 17 | - Click on a field: 18 | - /builder?q=(dataset:mosaic_nih_v5,form:primary,forms:!((name:primary,selection:(aggregation:histogram,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 19 | - BACK: 20 | - /builder 21 | 22 | 3. 23 | - /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:histogram,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 24 | - /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:reverse_nested,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 25 | - /builder?q=(dataset:mosaic_nih_v5,form:primary,forms:!((name:primary,selection:(aggregation:reverse_nested,field:year_fiscal_funding,type:integer)),(name:secondary)),size:0) 26 | - /builder?q=(dataset:mosaic_nih_v5,form:primary,forms:!((name:primary,selection:(aggregation:reverse_nested,field:cost_total_project,type:integer)),(name:secondary)),size:0) 27 | 28 | 4. 29 | - /builder 30 | - /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:extended_stats,field:cost_total_project))),size:0) 31 | - Select second axis 32 | - Select first axis 33 | - Select second axis 34 | - Select first axis 35 | - Select second axis 36 | - BACK: /builder?q=(dataset:mosaic_nih_v7,form:primary,forms:!((name:primary,selection:(aggregation:extended_stats,field:cost_total_project))),size:0) 37 | - BACK: /builder -------------------------------------------------------------------------------- /fe/src/lib/utils/specs.js: -------------------------------------------------------------------------------- 1 | /* 2 | This file is imported by `src/bin/make_data.js` 3 | - we can't import files generated by it like `data/datasets.json` in here 4 | - we can't import @svizzle/ui as its index exports `.svelte` files 5 | */ 6 | 7 | import { 8 | isObject, 9 | isString, 10 | makePostfixed, 11 | applyFnMap 12 | } from '@svizzle/utils'; 13 | import * as _ from 'lamb'; 14 | 15 | import {useCache, selectedCacheURL} from '$lib/env.js'; 16 | 17 | export const getSource = _.getKey('source'); 18 | export const getSpecVersion = _.getKey('version'); 19 | export const getApiVersion = _.getPath('spec.dataset.api_version'); 20 | export const getSchema = _.getPath('spec.dataset.schema'); 21 | export const getEsEndpointURL = _.getPath('spec.dataset.endpoint_url'); 22 | export const getEsSearchURL = _.pipe([getEsEndpointURL, makePostfixed('/_search')]); 23 | export const getBeEndpointURL = useCache 24 | // FIXME import `es` from shared configuration with BE 25 | ? spec => `${selectedCacheURL}/es/${_.getPathIn(spec, 'spec.dataset.endpoint_url')}` 26 | : getEsEndpointURL; 27 | export const getBeCoverageEndpointURL = spec => 28 | `${selectedCacheURL}/coverage/${_.getPathIn(spec, 'spec.dataset.endpoint_url')}` 29 | export const getBeSearchURL = _.pipe([getBeEndpointURL, makePostfixed('/_search')]); 30 | export const getFieldTypeId = _.getKey('type'); 31 | export const getESType = _.adapter([ 32 | _.casus(isObject, getFieldTypeId), 33 | _.casus(isString, _.identity), 34 | _.always('unknown') 35 | ]); 36 | 37 | export const getDocCount = _.getKey('doc_count'); 38 | export const getLocation = _.getKey('location'); 39 | 40 | // e.g. `arxlive_arxiv_v3` 41 | export const getDatasetIdOf = 42 | ({project, source, version}) => `${project}_${source}_v${version}`; 43 | 44 | export const groupBySource = _.groupBy(getSource); 45 | 46 | export const makeDatasetBySource = _.pipe([ 47 | _.groupBy(getSource), 48 | _.mapValuesWith(_.sortWith([_.getKey('project'), getSpecVersion])), 49 | _.values, 50 | _.sortWith([getSource]), 51 | _.mapWith(applyFnMap({ 52 | source: _.getPath('0.source'), 53 | releases: _.sortWith([getSpecVersion]) 54 | })) 55 | ]); 56 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/diversified_sampler.todo.js: -------------------------------------------------------------------------------- 1 | import {esSearchableField} from '$lib/elasticsearch/aggs/ref/typeGroups.js'; 2 | import {optional} from '$lib/elasticsearch/types/params.js'; 3 | import {enumsOf, integerD, string} from '$lib/types/index.js'; 4 | import {missing} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 5 | 6 | export default { 7 | id: 'diversified_sampler', 8 | availability: { 9 | from: '5.0' 10 | }, 11 | docPath: '/search-aggregations-bucket-diversified-sampler-aggregation.html', 12 | docs: 'Like the sampler aggregation this is a filtering aggregation used to limit any sub aggregations processing to a sample of the top-scoring documents. The diversified_sampler aggregation adds the ability to limit the number of matches that share a common value such as an "author".', 13 | fieldType: esSearchableField, 14 | label: 'Diversified Sampler', 15 | request: { 16 | execution_hint: optional( 17 | enumsOf(['map', 'global_ordinals', 'bytes_hash'], 'global_ordinals') 18 | ), 19 | field: string, 20 | max_docs_per_value: optional(integerD(1)), 21 | missing: optional(string), // [1] 22 | shard_size: optional(integerD(100)), 23 | }, 24 | requestDoc: { 25 | execution_hint: 'The optional execution_hint setting can influence the management of the values used for de-duplication. Each option will hold up to `shard_size` values in memory while performing de-duplication but the type of value held can be controlled as follows: * hold field values directly (`map`) * hold ordinals of the field as determined by the Lucene index (`global_ordinals`) * hold hashes of the field values - with potential for hash collisions (`bytes_hash`)', 26 | max_docs_per_value: 'The max_docs_per_value is an optional parameter and limits how many documents are permitted per choice of de-duplicating value. The default setting is "1".', 27 | missing, 28 | shard_size: 'The shard_size parameter limits how many top-scoring documents are collected in the sample processed on each shard. The default value is 100.' 29 | }, 30 | subAggs: true, 31 | tag: 'bucketing', 32 | }; 33 | 34 | // [1] TODO add constraint to be same type as the field type 35 | -------------------------------------------------------------------------------- /fe/src/lib/app/components/elementary/TypedField.svelte: -------------------------------------------------------------------------------- 1 | 48 | 49 | 64 |
    68 | {#if editor} 69 | 80 | {/if} 81 |
    82 | 83 | 96 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/variable_width_histogram.js: -------------------------------------------------------------------------------- 1 | import {optional} from '$lib/elasticsearch/types/params.js'; 2 | import {integerD, number, string} from '$lib/types/index.js'; 3 | import {field} from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 4 | import response from '$lib/elasticsearch/aggs/response/bucketsDocCount.js'; 5 | 6 | export default { 7 | id: 'variable_width_histogram', 8 | availability: { 9 | from: '7.9' 10 | }, 11 | docPath: '/search-aggregations-bucket-variablewidthhistogram-aggregation.html', 12 | docs: 'This is a multi-bucket aggregation similar to Histogram. However, the width of each bucket is not specified. Rather, a target number of buckets is provided and bucket intervals are dynamically determined based on the document distribution. Unlike other multi-bucket aggregations, the intervals will not necessarily have a uniform width.', 13 | fieldType: number, 14 | label: 'Auto Histogram', 15 | request: { // [0] 7.9: no params table 16 | buckets: optional(integerD(10)), 17 | field: string, 18 | initial_buffer: optional(integerD(5000)), 19 | shard_size: optional(integerD(500)), 20 | }, 21 | requestDoc: { 22 | buckets: 'The target number of buckets. Bucket intervals are dynamically determined based on the document distribution.', 23 | field, 24 | initial_buffer: 'The number of individual documents that will be stored in memory on a shard before the initial bucketing algorithm is run. Bucket distribution is determined using this sample of initial_buffer documents. So, although a higher initial_buffer will use more memory, it will lead to more representative clusters.', 25 | shard_size: 'The number of buckets that the coordinating node will request from each shard.', 26 | }, 27 | response, 28 | subAggs: true, 29 | tag: 'bucketing', 30 | lastChecked: '7.9', 31 | }; 32 | 33 | /* 34 | [0] 7.9: no params table 35 | [1] TODO later (#201): needs a constraint 36 | > This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket. 37 | https://www.elastic.co/guide/en/elasticsearch/reference/7.9/search-aggregations-bucket-variablewidthhistogram-aggregation.html 38 | */ 39 | -------------------------------------------------------------------------------- /specs/indices/types/type_templates.yaml: -------------------------------------------------------------------------------- 1 | # generic type 2 | 3 | : 4 | data_type: string 5 | description: string # OPTIONAL 6 | format: string # OPTIONAL - e.g. YYYYMMDD 7 | kind: string # e.g. density, currency 8 | # constraints 9 | regex: RegExp # OPTIONAL – if `data_type` = `string`, a regular expression that the value must match 10 | range: number[] # OPTIONAL – if `data_type` = `integer`|`float`, min and max acceptable values (included) – e.g. `[0, 1]` 11 | min: number # OPTIONAL – if `data_type` = `integer`|`float`, min acceptable value (included) – e.g. 0 for weight 12 | max: number # OPTIONAL – if `data_type` = `integer`|`float`, max acceptable value (included) – e.g. `700` for visible light wavelength (nm) 13 | values: array # OPTIONAL – if there are a handful of known acceptable values – `e.g. ['brown', 'blue', 'orange']` 14 | 15 | # GEOGRAPHY 16 | 17 | GeoRegion: 18 | data_type: object 19 | id_regex: RegExp # OPTIONAL – a regular expression useful to test ID values 20 | id: string # the region id e.g. UKF2 21 | kind: geographic region 22 | level: int # OPTIONAL – the region level, used for NUTS, e.g. 2 23 | levels: int[] # OPTIONAL – a list of all the possible levels – e.g for NUTS: [0, 1, 2, 3] 24 | name: string # name of the region, e.g. Cornwall, France 25 | provider: string # OPTIONAL - use `PROVIDED` in case `provider_url` is unknown because the data have already been reverse geocoded 26 | provider_url: URL # OPTIONAL - url of the boundaries used for reverse geocoding, if known 27 | region_type: string # the region type (e.g. `NUTS`, `LEP`, etc) 28 | year_enforced: int # OPTIONAL - year of spec being enforced 29 | year_spec: int # year of specification release 30 | 31 | # UNITS 32 | 33 | Unit: 34 | data_type: string 35 | description: string # OPTIONAL 36 | kind: string # e.g. density, currency 37 | label: string # short text 38 | unit_latex: string # e.g. \SI{32}{mg.m^{-3}} 39 | unit_string: string # e.g. `mg m^-3` 40 | 41 | Datestring: 42 | data_type: string 43 | format: string # e.g. YYYYMMDD 44 | kind:  date 45 | regex: RegExp # OPTIONAL – a regular expression useful to test values 46 | -------------------------------------------------------------------------------- /fe/src/lib/elasticsearch/aggs/spec/histogram.js: -------------------------------------------------------------------------------- 1 | import { 2 | booleanD, 3 | extent, 4 | integer, 5 | integerD, 6 | number, 7 | string, 8 | } from '$lib/types/index.js'; 9 | import {optional, sortOptions} from '$lib/elasticsearch/types/params.js'; 10 | import { 11 | field, 12 | keyed, 13 | min_doc_count, 14 | missing 15 | } from '$lib/elasticsearch/aggs/ref/requestDoc.js'; 16 | import response from '$lib/elasticsearch/aggs/response/bucketsDocCount.js'; 17 | 18 | export default { 19 | id: 'histogram', 20 | availability: { 21 | from: '1.3' 22 | }, 23 | docPath: '/search-aggregations-bucket-histogram-aggregation.html', 24 | docs: 'A multi-bucket values source based aggregation that can be applied on numeric values extracted from the documents.', 25 | fieldType: number, 26 | label: 'Histogram', 27 | request: { // [0] 28 | extended_bounds: optional(extent), 29 | field: string, 30 | interval: integer, 31 | keyed: optional(booleanD(false)), 32 | min_doc_count: optional(integerD(1, true)), 33 | missing: optional(number), // [1] 34 | offset: optional(integerD(0)), 35 | order: optional(sortOptions), 36 | }, 37 | requestDoc: { 38 | extended_bounds: 'With extended_bounds setting, you now can "force" the histogram aggregation to start building buckets on a specific min value and also keep on building buckets up to a max value (even if there are no documents anymore). Using extended_bounds only makes sense when `min_doc_count` is 0 (the empty buckets will never be returned if `min_doc_count` is greater than 0).', 39 | field, 40 | interval: 'When the aggregation executes, the selected field of every document will be evaluated and will be rounded down to its closest bucket. Must be a positive decimal.', 41 | keyed, 42 | min_doc_count, 43 | missing, 44 | offset: 'Shifts bucket boundaries. Must be a decimal greater than or equal to 0 and less than interval.', 45 | order: 'The order of the buckets can be customized by setting the order parameter. ', 46 | }, 47 | response, 48 | subAggs: true, 49 | tag: 'bucketing', 50 | version: '7.9', 51 | }; 52 | 53 | // [0] 7.9: no params table 54 | // [1] TODO add constraint to be same type as the field type 55 | --------------------------------------------------------------------------------