├── scripts
└── .gitkeep
├── .idea
├── .name
├── vcs.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── metacrafter-registry.iml
└── modules.xml
├── .gitignore
├── data
├── datahub
│ └── bg.yaml
├── datatypes
│ ├── any
│ │ ├── datetime
│ │ │ ├── age.yaml
│ │ │ ├── en_month.yaml
│ │ │ ├── time.yaml
│ │ │ ├── en_dayofweek.yaml
│ │ │ ├── en_monthshort.yaml
│ │ │ ├── month.yaml
│ │ │ ├── timerange.yaml
│ │ │ ├── dayofmonth.yaml
│ │ │ ├── monthday.yaml
│ │ │ ├── yearmonth.yaml
│ │ │ ├── year.yaml
│ │ │ ├── dayofweek.yaml
│ │ │ ├── deathday.yaml
│ │ │ ├── quarter.yaml
│ │ │ ├── date.yaml
│ │ │ ├── timezone.yaml
│ │ │ ├── datetime.yaml
│ │ │ ├── utctimezoneoffiset.yaml
│ │ │ ├── ianatimezoneid.yaml
│ │ │ ├── duration.yaml
│ │ │ ├── timespan.yaml
│ │ │ ├── iso8601duration.yaml
│ │ │ ├── unixtime.yaml
│ │ │ └── birthday.yaml
│ │ ├── internet
│ │ │ ├── ipsubnet.yaml
│ │ │ ├── ipaddr.yaml
│ │ │ ├── eui48.yaml
│ │ │ ├── ipv6subnet.yaml
│ │ │ ├── asn.yaml
│ │ │ ├── ipv4.yaml
│ │ │ ├── ipv6.yaml
│ │ │ ├── tld.yaml
│ │ │ ├── ipv4subnet.yaml
│ │ │ └── fqdn.yaml
│ │ ├── values
│ │ │ ├── percentage.yaml
│ │ │ ├── income.yaml
│ │ │ ├── quantity.yaml
│ │ │ ├── price.yaml
│ │ │ ├── money_amount.yaml
│ │ │ ├── cost.yaml
│ │ │ ├── discount.yaml
│ │ │ ├── score.yaml
│ │ │ ├── grossmargin.yaml
│ │ │ ├── numeric.yaml
│ │ │ └── share.yaml
│ │ ├── geo
│ │ │ ├── geopoint.yaml
│ │ │ ├── street.yaml
│ │ │ ├── en_countryname.yaml
│ │ │ ├── iso3166code.yaml
│ │ │ ├── geonamesid.yaml
│ │ │ ├── continent.yaml
│ │ │ ├── wbregion.yaml
│ │ │ ├── longitude.yaml
│ │ │ ├── latitude.yaml
│ │ │ ├── address.yaml
│ │ │ ├── postindex.yaml
│ │ │ ├── what3words.yaml
│ │ │ ├── unm49.yaml
│ │ │ ├── city.yaml
│ │ │ ├── iso6709.yaml
│ │ │ └── countrycode_alpha3.yaml
│ │ ├── persons
│ │ │ ├── password.yaml
│ │ │ ├── gender.yaml
│ │ │ ├── person_midname.yaml
│ │ │ ├── username.yaml
│ │ │ ├── person_fullname.yaml
│ │ │ ├── person_surname.yaml
│ │ │ ├── person_firstname.yaml
│ │ │ ├── nationality.yaml
│ │ │ ├── en_nationality.yaml
│ │ │ ├── passport.yaml
│ │ │ └── jobtitle.yaml
│ │ ├── transport
│ │ │ ├── air
│ │ │ │ ├── airlinename.yaml
│ │ │ │ ├── airlinecallsign.yaml
│ │ │ │ ├── airlinecode.yaml
│ │ │ │ ├── iataairlinecode.yaml
│ │ │ │ ├── airport.yaml
│ │ │ │ ├── flightnumber.yaml
│ │ │ │ └── aircraftnumber.yaml
│ │ │ ├── vehicles
│ │ │ │ └── vehiclenumber.yaml
│ │ │ ├── railway
│ │ │ │ └── uiccode.yaml
│ │ │ └── maritime
│ │ │ │ └── imonumber.yaml
│ │ ├── pii
│ │ │ ├── nhsnumber.yaml
│ │ │ ├── ssn.yaml
│ │ │ └── ninumber.yaml
│ │ ├── finances
│ │ │ ├── bic.yaml
│ │ │ ├── clabe.yaml
│ │ │ ├── ifsc.yaml
│ │ │ ├── aba.yaml
│ │ │ ├── uksortcode.yaml
│ │ │ ├── catransit.yaml
│ │ │ ├── iso4217curname.yaml
│ │ │ ├── iso4217curcode.yaml
│ │ │ ├── currency.yaml
│ │ │ ├── tickersymbol.yaml
│ │ │ ├── swiftcode.yaml
│ │ │ └── iban.yaml
│ │ ├── medical
│ │ │ ├── npi.yaml
│ │ │ ├── upin.yaml
│ │ │ ├── cptcode.yaml
│ │ │ ├── snomedcode.yaml
│ │ │ └── gmcnumber.yaml
│ │ ├── education
│ │ │ ├── ukurn.yaml
│ │ │ ├── ukprn.yaml
│ │ │ ├── studentid.yaml
│ │ │ └── ncesid.yaml
│ │ ├── identifiers
│ │ │ ├── id.yaml
│ │ │ ├── guid.yaml
│ │ │ ├── mongodbid.yaml
│ │ │ ├── uuid.yaml
│ │ │ └── dcid.yaml
│ │ ├── common
│ │ │ ├── category.yaml
│ │ │ ├── name.yaml
│ │ │ ├── hexcolor.yaml
│ │ │ ├── color.yaml
│ │ │ └── genre.yaml
│ │ ├── environment
│ │ │ ├── gridnode.yaml
│ │ │ ├── powerplantid.yaml
│ │ │ ├── waterstation.yaml
│ │ │ └── emissioninventory.yaml
│ │ ├── cryptocurrency
│ │ │ ├── p2wshaddr.yaml
│ │ │ ├── p2wpkhaddr.yaml
│ │ │ ├── wifaddrcomp.yaml
│ │ │ ├── wifaddruncomp.yaml
│ │ │ ├── p2shaddr.yaml
│ │ │ ├── p2pkhaddr.yaml
│ │ │ └── bip32addr.yaml
│ │ ├── cryptography
│ │ │ ├── sha1hash.yaml
│ │ │ ├── sha256hash.yaml
│ │ │ ├── md5hash.yaml
│ │ │ ├── sct.yaml
│ │ │ └── crc32.yaml
│ │ ├── texts
│ │ │ ├── headline.yaml
│ │ │ ├── description.yaml
│ │ │ └── comment.yaml
│ │ ├── shipping
│ │ │ ├── tracknum.yaml
│ │ │ ├── s10upu.yaml
│ │ │ └── unpackaginggroup.yaml
│ │ ├── science
│ │ │ ├── scopusauthoirid.yaml
│ │ │ ├── researcherid.yaml
│ │ │ ├── openalexid.yaml
│ │ │ ├── academicdegree.yaml
│ │ │ └── doiprefix.yaml
│ │ ├── dublincore
│ │ │ ├── dclanguage.yaml
│ │ │ ├── dctitle.yaml
│ │ │ └── dcabstract.yaml
│ │ ├── software
│ │ │ ├── programminglang.yaml
│ │ │ ├── filesize.yaml
│ │ │ ├── imphash.yaml
│ │ │ ├── datasize.yaml
│ │ │ ├── telfhash.yaml
│ │ │ ├── tlsh.yaml
│ │ │ ├── vhash.yaml
│ │ │ ├── ssdeep.yaml
│ │ │ ├── richpeheader.yaml
│ │ │ └── authentihash.yaml
│ │ ├── useraccounts
│ │ │ ├── twitter_username.yaml
│ │ │ ├── skype_username.yaml
│ │ │ ├── github_username.yaml
│ │ │ └── instagram_username.yaml
│ │ ├── companies
│ │ │ ├── iso6523.yaml
│ │ │ ├── en_companyname.yaml
│ │ │ ├── orgname.yaml
│ │ │ └── opencorporatesid.yaml
│ │ ├── telecom
│ │ │ ├── imsi.yaml
│ │ │ ├── imei.yaml
│ │ │ └── msisdn.yaml
│ │ ├── government
│ │ │ ├── legislationname.yaml
│ │ │ └── legislationtype.yaml
│ │ ├── objectids
│ │ │ ├── wikidataid.yaml
│ │ │ ├── openlibraryid.yaml
│ │ │ └── viafid.yaml
│ │ ├── files
│ │ │ └── filename.yaml
│ │ ├── industry
│ │ │ └── isicrev4.yaml
│ │ └── chemistry
│ │ │ ├── unclass.yaml
│ │ │ └── pubchemid.yaml
│ ├── US
│ │ ├── geo
│ │ │ ├── us_city.yaml
│ │ │ ├── us_county.yaml
│ │ │ ├── us_state.yaml
│ │ │ ├── us_fips52.yaml
│ │ │ ├── us_zipcode.yaml
│ │ │ └── us_fips64.yaml
│ │ ├── persons
│ │ │ ├── us_passport.yaml
│ │ │ ├── us_npi.yaml
│ │ │ ├── us_ptin.yaml
│ │ │ ├── us_itin.yaml
│ │ │ ├── us_ssn.yaml
│ │ │ ├── us_ein.yaml
│ │ │ ├── us_atin.yaml
│ │ │ └── us_dea_certificate.yaml
│ │ ├── industry
│ │ │ ├── us_naicscode.yaml
│ │ │ └── us_soc.yaml
│ │ ├── telecom
│ │ │ └── fccid.yaml
│ │ ├── finances
│ │ │ ├── us_bankaccount.yaml
│ │ │ ├── us_aba_routing.yaml
│ │ │ └── us_cusip.yaml
│ │ └── government
│ │ │ └── us_piid.yaml
│ ├── RU
│ │ ├── geo
│ │ │ ├── ru_city.yaml
│ │ │ ├── ru_street.yaml
│ │ │ ├── ru_countryname.yaml
│ │ │ ├── ru_postalcode.yaml
│ │ │ ├── ru_regioncode.yaml
│ │ │ ├── ru_regionname.yaml
│ │ │ ├── ru_mosdistricts.yaml
│ │ │ ├── ru_mosadmareas.yaml
│ │ │ ├── ru_feddistrict.yaml
│ │ │ ├── ru_region.yaml
│ │ │ ├── ru_kadastr.yaml
│ │ │ └── ru_kladr.yaml
│ │ ├── datetime
│ │ │ ├── ru_timerange.yaml
│ │ │ └── ru_dayofweek.yaml
│ │ ├── persons
│ │ │ ├── ru_degree.yaml
│ │ │ ├── ru_midname.yaml
│ │ │ ├── ru_firstname.yaml
│ │ │ ├── ru_surname.yaml
│ │ │ ├── ru_fullname.yaml
│ │ │ ├── ru_workposition.yaml
│ │ │ ├── ru_passport.yaml
│ │ │ ├── ru_intpassport.yaml
│ │ │ └── ru_snils.yaml
│ │ ├── companies
│ │ │ ├── ru_companyname.yaml
│ │ │ ├── ru_okogu.yaml
│ │ │ ├── ru_okopfname.yaml
│ │ │ ├── ru_ikuiko.yaml
│ │ │ ├── ru_ikz.yaml
│ │ │ ├── ru_okopf.yaml
│ │ │ ├── ru_inn.yaml
│ │ │ ├── ru_kpp.yaml
│ │ │ ├── ru_okpo.yaml
│ │ │ ├── ru_ogrn_ogrnip.yaml
│ │ │ └── ru_okfs.yaml
│ │ ├── government
│ │ │ ├── ru_rnfi.yaml
│ │ │ ├── ru_csrcode.yaml
│ │ │ ├── ru_npa.yaml
│ │ │ ├── ru_budgetname.yaml
│ │ │ ├── ru_npakind.yaml
│ │ │ ├── ru_kosgucode.yaml
│ │ │ ├── ru_kvrcode.yaml
│ │ │ ├── ru_budgetcode.yaml
│ │ │ ├── ru_tofkcode.yaml
│ │ │ ├── ru_tofkname.yaml
│ │ │ ├── ru_ppocode.yaml
│ │ │ ├── ru_kvrname.yaml
│ │ │ ├── ru_pponame.yaml
│ │ │ ├── ru_fedgrbs.yaml
│ │ │ └── ru_kbk.yaml
│ │ ├── finances
│ │ │ ├── ru_okvalpha.yaml
│ │ │ ├── ru_okvname.yaml
│ │ │ └── ru_sbankaccount.yaml
│ │ ├── industry
│ │ │ ├── ru_okved.yaml
│ │ │ └── ru_okpd.yaml
│ │ └── medical
│ │ │ ├── ru_medicinetradename.yaml
│ │ │ ├── ru_medicineregnum.yaml
│ │ │ └── ru_medmnn.yaml
│ ├── AT
│ │ └── geo
│ │ │ └── at_city.yaml
│ ├── FR
│ │ ├── persons
│ │ │ ├── fr_fullname.yaml
│ │ │ ├── fr_nir.yaml
│ │ │ ├── fr_passport.yaml
│ │ │ └── fr_cni.yaml
│ │ └── geo
│ │ │ ├── fr_epcicode.yaml
│ │ │ ├── fr_postcode.yaml
│ │ │ ├── fr_siretcode.yaml
│ │ │ └── fr_inseecode.yaml
│ ├── NL
│ │ └── companies
│ │ │ └── rsin.yaml
│ ├── AU
│ │ ├── persons
│ │ │ ├── au_passport.yaml
│ │ │ ├── au_tfn_number.yaml
│ │ │ └── au_medicare.yaml
│ │ ├── finances
│ │ │ └── au_bsb.yaml
│ │ └── companies
│ │ │ ├── au_acn.yaml
│ │ │ └── au_abn.yaml
│ ├── MX
│ │ └── geo
│ │ │ ├── mx_state.yaml
│ │ │ └── mx_iso3166_2.yaml
│ ├── CA
│ │ ├── geo
│ │ │ ├── ca_province copy.yaml
│ │ │ └── ca_province.yaml
│ │ ├── persons
│ │ │ ├── ca_passport.yaml
│ │ │ ├── ca_on_ohip.yaml
│ │ │ ├── ca_driver_license.yaml
│ │ │ ├── ca_sin.yaml
│ │ │ └── ca_bc_ptn.yaml
│ │ ├── companies
│ │ │ ├── cacorp.yaml
│ │ │ └── cabizlic.yaml
│ │ └── finances
│ │ │ ├── cacharity.yaml
│ │ │ ├── cabn.yaml
│ │ │ └── cagst.yaml
│ ├── GB
│ │ ├── persons
│ │ │ ├── uk_passport.yaml
│ │ │ ├── uk_utr.yaml
│ │ │ ├── uk_nino.yaml
│ │ │ ├── uk_driver_license.yaml
│ │ │ └── uk_nhs_number.yaml
│ │ ├── geo
│ │ │ ├── uk_postalcode.yaml
│ │ │ ├── uk_wardcode.yaml
│ │ │ ├── uk_wardname.yaml
│ │ │ ├── uk_toid.yaml
│ │ │ └── uk_uprn.yaml
│ │ ├── finances
│ │ │ └── uk_sedol.yaml
│ │ ├── medical
│ │ │ └── uk_bnfcode.yaml
│ │ └── companies
│ │ │ └── uk_companyhouseid.yaml
│ ├── TH
│ │ └── persons
│ │ │ └── th_idcard.yaml
│ ├── ES
│ │ ├── persons
│ │ │ ├── es_driver_license.yaml
│ │ │ ├── es_passport.yaml
│ │ │ ├── es_nif_number.yaml
│ │ │ └── es_nie_number.yaml
│ │ └── geo
│ │ │ └── es_postcode.yaml
│ ├── DE
│ │ ├── persons
│ │ │ ├── de_driver_license.yaml
│ │ │ └── de_personalausweis.yaml
│ │ ├── medical
│ │ │ └── opscode.yaml
│ │ └── finances
│ │ │ └── handelsregisternr.yaml
│ ├── SE
│ │ └── persons
│ │ │ ├── se_passport.yaml
│ │ │ └── se_personnumer.yaml
│ ├── BR
│ │ └── persons
│ │ │ └── br_cpf.yaml
│ ├── FI
│ │ └── persons
│ │ │ └── fi_natid.yaml
│ ├── BE
│ │ └── persons
│ │ │ └── be_natcardnum.yaml
│ ├── SG
│ │ └── persons
│ │ │ └── sg_nric.yaml
│ ├── EU
│ │ ├── industry
│ │ │ └── eu_cpvcode.yaml
│ │ ├── geo
│ │ │ └── eu_nuts.yaml
│ │ ├── transport
│ │ │ ├── eu_cin.yaml
│ │ │ └── eu_eninumber.yaml
│ │ └── companies
│ │ │ └── eu_vatin.yaml
│ ├── AR
│ │ └── persons
│ │ │ └── ar_dni.yaml
│ └── DK
│ │ └── persons
│ │ └── dk_cpr.yaml
├── tools
│ ├── pii
│ │ ├── piicatcher.yaml
│ │ ├── scrubadub.yaml
│ │ ├── ibm-guardium.yaml
│ │ └── metadata-guardian.yaml
│ └── other
│ │ ├── soda.yaml
│ │ ├── auctus.yaml
│ │ ├── metabase.yaml
│ │ └── googledatastudio.yaml
├── langs.yaml
└── schemes
│ └── tool.json
├── _original
├── countries.csv
├── languages.csv
└── categories.csv
├── requirements.txt
└── analysis
├── sherlock
└── sherlock_datatypes.xlsx
└── README.md
/scripts/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | us_dea_certificate.yaml
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _site/
2 | .sass-cache/
3 | .jekyll-cache/
4 | .jekyll-metadata
5 |
--------------------------------------------------------------------------------
/data/datahub/bg.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | type: datahub-business-glossary
3 | config:
4 | file: metacrafter.yml
5 |
--------------------------------------------------------------------------------
/_original/countries.csv:
--------------------------------------------------------------------------------
1 | RU Russian Federation
2 | GB United Kingdom
3 | US United States
4 | FR France
5 | ES Spain
6 | MX Mexico
--------------------------------------------------------------------------------
/_original/languages.csv:
--------------------------------------------------------------------------------
1 | ru Russian
2 | common Common to most languages
3 | en English
4 | fr French
5 | es Spanish
6 | it Italian
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Core dependencies for metacrafter-registry
2 | PyYAML>=6.0
3 | cerberus>=1.3.4
4 | typer>=0.9.0
5 | flask>=2.3.0
6 |
7 |
--------------------------------------------------------------------------------
/analysis/sherlock/sherlock_datatypes.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apicrafter/metacrafter-registry/HEAD/analysis/sherlock/sherlock_datatypes.xlsx
--------------------------------------------------------------------------------
/analysis/README.md:
--------------------------------------------------------------------------------
1 | # Analysis
2 |
3 | Review and analysis of existing metadata/semantic types tools.
4 |
5 | Someday will be moved to another repository.
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/age.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Age of the person or organization in years
4 | id: age
5 | is_pii: 'True'
6 | langs:
7 | - common
8 | name: Age
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/en_month.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: English name of the month
4 | id: enmonth
5 | langs:
6 | - en
7 | name: Month name (English)
8 | semantic_type: month
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_city.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: United States city name
6 | id: uscity
7 | langs:
8 | - en
9 | name: United States city
10 | semantic_type: city
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/time.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Time of the day, like 12:00 or 10PM or 11:05 or 18:01:27
4 | id: time
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Time
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipsubnet.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: Subnetwork of IPv4 or IPv6 addresses
4 | id: ipsubnet
5 | is_pii: 'True'
6 | langs:
7 | - common
8 | name: IPv4 or IPv6 subnet
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/percentage.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Parcentage float number
5 | id: percentage
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | name: Percentage value
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_city.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: City name in Russian language
6 | id: ruscity
7 | langs:
8 | - ru
9 | name: City name (Russian)
10 | semantic_type: city
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/geopoint.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: Basic geopoint as coordinates
4 | id: geopoint
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Geopoint
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/en_dayofweek.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Day of week writtern in English
4 | id: endayofweek
5 | langs:
6 | - en
7 | name: Day of week (in English)
8 | semantic_type: dayofweek
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/password.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | - pii
4 | doc: Passwords, usually associated with login/username/email.
5 | id: password
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: Password
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/AT/geo/at_city.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - AT
5 | doc: Austrian city name in German language
6 | id: atcity
7 | langs:
8 | - de
9 | name: Austrian city name (in German)
10 | semantic_type: city
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_street.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Street name writtern in Russian language
6 | id: rustreet
7 | langs:
8 | - ru
9 | name: Street name (Russian)
10 | semantic_type: street
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_county.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: Name or code of US County
6 | id: uscounty
7 | is_pii: 'False'
8 | langs:
9 | - en
10 | name: US County
11 | classification: categorical
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_state.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: Name or code of US State
6 | id: usstate
7 | is_pii: 'False'
8 | langs:
9 | - en
10 | name: US State
11 | classification: categorical
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/income.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Income, sub-type of money
5 | id: income
6 | is_pii: 'False'
7 | parent:
8 | type: money
9 | langs:
10 | - common
11 | name: Income (money)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/airlinename.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: Unique name of the airline
4 | id: airlinename
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Airline name
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/quantity.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Number of something, quantity
5 | id: quantity
6 | is_pii: 'False'
7 | parent:
8 | type: numeric
9 | langs:
10 | - common
11 | name: Quantity
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/datetime/ru_timerange.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | country:
4 | - RU
5 | doc: Time range written in Russian
6 | id: rustimerange
7 | langs:
8 | - ru
9 | name: Time range (Russian)
10 | semantic_type: timerange
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/price.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Price of something, sub-type of money
5 | id: price
6 | is_pii: 'False'
7 | parent:
8 | type: money
9 | langs:
10 | - common
11 | name: Price (money)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_degree.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | country:
4 | - RU
5 | doc: Academic degree title in Russian.
6 | id: rusdegree
7 | langs:
8 | - ru
9 | name: Academic degree (Russian)
10 | semantic_type: academicdegree
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_midname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Middle name in Russian
7 | id: rusmidname
8 | langs:
9 | - ru
10 | name: Middle name in Russian
11 | semantic_type: person_midname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/en_monthshort.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Abbreviated short English name/abbreviation of month
4 | id: enmonthshort
5 | langs:
6 | - en
7 | name: Short name of month (English)
8 | semantic_type: month
9 | links: []
10 | examples: []
11 | wikidata_property: ''
12 | translations: {}
13 | regexp: ''
14 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/gender.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | doc: Gender flag and/or any other identification value
5 | id: gender
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: Gender
10 | classification: categorical
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/money_amount.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Amount of money as numeric value
5 | id: money
6 | is_pii: 'False'
7 | parent:
8 | type: numeric
9 | langs:
10 | - common
11 | name: Money amount
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_firstname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: First name in Russian
7 | id: rusfirstname
8 | langs:
9 | - ru
10 | name: First name in Russian
11 | semantic_type: person_firstname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/person_midname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - pii
4 | doc: Middle name of the person
5 | id: person_midname
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: Person middle name
10 | patterns:
11 | - rusmidname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/pii/nhsnumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | id: nhsnumber
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: NHS number
8 | doc: Datatype for 'NHS number' (nhsnumber) from rules in context 'pii'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/cost.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Cost of something as sub-type of money
5 | id: cost
6 | is_pii: 'False'
7 | parent:
8 | type: money
9 | langs:
10 | - common
11 | name: Cost of something (money)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/discount.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Discount from price sub-type of money
5 | id: discount
6 | is_pii: 'False'
7 | parent:
8 | type: money
9 | langs:
10 | - common
11 | name: Discount (money)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/score.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Score of something as numeric/float number
5 | id: score
6 | is_pii: 'False'
7 | parent:
8 | type: numeric
9 | langs:
10 | - common
11 | name: Score (numeric)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/datetime/ru_dayofweek.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | country:
4 | - RU
5 | doc: Day of week written in Russian language
6 | id: rusdayofweek
7 | langs:
8 | - ru
9 | name: Day of week (in Russian)
10 | semantic_type: dayofweek
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_countryname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Name of the country written in Russian language.
6 | id: ruscountryname
7 | langs:
8 | - ru
9 | name: Country name (Russian)
10 | semantic_type: country
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_postalcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: 6 digits postal codes used in Russian Federation
6 | id: ruspostalcode
7 | langs:
8 | - common
9 | name: Russian postal code
10 | semantic_type: postindex
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_surname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Surname/Lastname in Russian
7 | id: russurname
8 | langs:
9 | - ru
10 | name: Surname/Lastname in Russian
11 | semantic_type: person_surname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/airlinecallsign.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: Unique call sign of the airline
4 | id: airlinecallsign
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Airline call sign
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/grossmargin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Gross margin, sub-type of money
5 | id: grossmargin
6 | is_pii: 'False'
7 | parent:
8 | type: money
9 | langs:
10 | - common
11 | name: Gross margin (money)
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/bic.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: bic
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: SWIFT/BIC code
8 | doc: Datatype for 'SWIFT/BIC code' (bic) from rules in context 'finances'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/medical/npi.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: npi
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: US NPI identifier
8 | doc: Datatype for 'US NPI identifier' (npi) from rules in context 'medical'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/numeric.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Any integer or float number
5 | id: numeric
6 | is_pii: 'False'
7 | links:
8 | - type: schema.org
9 | url: https://schema.org/Number
10 | langs:
11 | - common
12 | name: Numeric value
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_companyname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Russian written company/business name
6 | id: rucompanyname
7 | langs:
8 | - ru
9 | name: Company/business name in Russian
10 | semantic_type: orgname
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/month.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Month as number since start of the year. Could be from 1 to 12
4 | id: month
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Month
9 | patterns:
10 | - enmonth
11 | - enmonthshort
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/education/ukurn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - education
3 | id: ukurn
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: UK school URN
8 | doc: Datatype for 'UK school URN' (ukurn) from rules in context 'education'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/clabe.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: clabe
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: Mexican CLABE
8 | doc: Datatype for 'Mexican CLABE' (clabe) from rules in context 'finances'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/ifsc.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: ifsc
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: Indian IFSC code
8 | doc: Datatype for 'Indian IFSC code' (ifsc) from rules in context 'finances'.
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/street.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - pii
4 | - common
5 | doc: Name of the street, commonly used in address
6 | id: street
7 | is_pii: 'True'
8 | langs:
9 | - common
10 | name: Street
11 | patterns:
12 | - rustreet
13 | - russtreet
14 | links: []
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/identifiers/id.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - identifiers
3 | doc: An unique identifier, local for DB/dataset or global and persistent
4 | id: id
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Unique identifier
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/vehicles/vehiclenumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: Number of the vehicle (vehicle plate number)
4 | id: vehiclenumber
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Vehicle number
9 | classification: identifier
10 | links: []
11 | examples: []
12 | wikidata_property: ''
13 | translations: {}
14 | regexp: ''
15 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_fullname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Fullname of the person in Russian
7 | id: rusfullname
8 | langs:
9 | - ru
10 | name: Fullname of the person in Russian
11 | semantic_type: person_fullname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/timerange.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Interval between two points of time, like 8:00-21:00 or 8:00AM-1:00PM.
4 | id: timerange
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Time range
9 | patterns:
10 | - rustimerange
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/medical/upin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: upin
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: US UPIN identifier
8 | doc: Datatype for 'US UPIN identifier' (upin) from rules in context
9 | 'medical'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/pii/ssn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | id: ssn
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: US Social Security Number
8 | doc: Datatype for 'US Social Security Number' (ssn) from rules in context
9 | 'pii'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/FR/persons/fr_fullname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - FR
6 | doc: Full name of the person written in French
7 | id: frfullname
8 | langs:
9 | - fr
10 | name: French full name of the person
11 | semantic_type: person_fullname
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_regioncode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Russian unique code of country subdivision (regions/federal subjects)
6 | id: rusregioncode
7 | langs:
8 | - ru
9 | name: Russian region unique code
10 | semantic_type: rusregion
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/education/ukprn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - education
3 | id: ukprn
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: UK provider UKPRN
8 | doc: Datatype for 'UK provider UKPRN' (ukprn) from rules in context
9 | 'education'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/aba.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: aba
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: US ABA routing number
8 | doc: Datatype for 'US ABA routing number' (aba) from rules in context
9 | 'finances'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/uksortcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: uksortcode
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: UK sort code
8 | doc: Datatype for 'UK sort code' (uksortcode) from rules in context
9 | 'finances'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/NL/companies/rsin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | id: rsin
4 | is_pii: 'False'
5 | langs:
6 | - nl
7 | name: RSIN op veldnaam
8 | doc: NL datatype for 'RSIN op veldnaam' (rsin) from rules in context 'companies'.
9 | classification: identifier
10 | country:
11 | - NL
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/common/category.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Object or entity category
4 | id: category
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Category
9 | translations:
10 | ru:
11 | doc: Категория объекта/сущности
12 | name: Категория
13 | classification: categorical
14 | links: []
15 | examples: []
16 | wikidata_property: ''
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/dayofmonth.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Month day, numeric value from 1 to 31
4 | id: dayofmonth
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Day of month
9 | translations:
10 | ru:
11 | doc: День месяца в значении от 1 до 31
12 | name: День месяца
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/education/studentid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - education
3 | id: studentid
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: Student identifier
8 | doc: Datatype for 'Student identifier' (studentid) from rules in context
9 | 'education'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/medical/cptcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: cptcode
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: CPT/HCPCS procedure code
8 | doc: Datatype for 'CPT/HCPCS procedure code' (cptcode) from rules in
9 | context 'medical'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/pii/ninumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | id: ninumber
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: UK National Insurance Number
8 | doc: Datatype for 'UK National Insurance Number' (ninumber) from rules
9 | in context 'pii'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_rnfi.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian government federal property unique ID (RNFI)
6 | id: rnfi
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian government property ID (RNFI)
11 | classification: identifier
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/education/ncesid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - education
3 | id: ncesid
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: US NCES school identifier
8 | doc: Datatype for 'US NCES school identifier' (ncesid) from rules in
9 | context 'education'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipaddr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | - pii
4 | doc: Internet Protocol version 4 (IPv4) or Internet Protocol version 6 (IPv6) address
5 | id: ipaddr
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: IPv4 or IPv6 address
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/medical/snomedcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: snomedcode
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: SNOMED CT concept code
8 | doc: Datatype for 'SNOMED CT concept code' (snomedcode) from rules in
9 | context 'medical'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/values/share.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - values
3 | - common
4 | doc: Share of the part as percentage as float number. Always between 0 and 100 percents.
5 | id: share
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | name: Share of value
10 | parent:
11 | type: percentage
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_regionname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Name of the region (federal subject) in Russia as it is in country Constitution
6 | id: rusregionname
7 | langs:
8 | - ru
9 | name: Russian region (federal subject) name
10 | semantic_type: rusregion
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/environment/gridnode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - environment
3 | id: gridnode
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Grid node identifier
8 | doc: Datatype for 'Grid node identifier' (gridnode) from rules in context
9 | 'environment'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/medical/gmcnumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: gmcnumber
4 | is_pii: 'True'
5 | langs:
6 | - en
7 | name: UK GMC registration number
8 | doc: Datatype for 'UK GMC registration number' (gmcnumber) from rules
9 | in context 'medical'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/username.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | doc: Name of the user or login, commonly used for authentication or user identification
5 | purposes
6 | id: username
7 | is_pii: 'True'
8 | langs:
9 | - common
10 | name: Username
11 | classification: identifier
12 | links: []
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_workposition.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | country:
4 | - RU
5 | doc: Work positions / job title common in Russia and written in Russian language.
6 | id: rusworkposition
7 | langs:
8 | - ru
9 | name: Work position / Job title (Russian)
10 | semantic_type: jobtitle
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/p2wshaddr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | id: p2wshaddr
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | links:
8 | - type: other
9 | url: https://allprivatekeys.com/bitcoin-address-format
10 | name: P2WSH address
11 | doc: P2WSH address
12 | classification: identifier
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/p2wpkhaddr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | id: p2wpkhaddr
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | links:
8 | - type: other
9 | url: https://allprivatekeys.com/bitcoin-address-format
10 | name: P2WPKH address
11 | doc: P2WPKH address
12 | classification: identifier
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptography/sha1hash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptography
3 | doc: SHA-1 hash from file or data. Sometimes used as unique id of the data record
4 | id: sha1hash
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/SHA-1
11 | name: SHA-1 hash
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/environment/powerplantid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - environment
3 | id: powerplantid
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Power plant identifier
8 | doc: Datatype for 'Power plant identifier' (powerplantid) from rules
9 | in context 'environment'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/catransit.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: catransit
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | name: Canadian branch transit number
8 | doc: Datatype for 'Canadian branch transit number' (catransit) from
9 | rules in context 'finances'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/monthday.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Defines a part of a date - the month and day. Example (MM-DD).
4 | id: monthday
5 | is_pii: 'False'
6 | links:
7 | - type: other
8 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html
9 | langs:
10 | - common
11 | name: Month and day
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/environment/waterstation.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - environment
3 | id: waterstation
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Water quality station id
8 | doc: Datatype for 'Water quality station id' (waterstation) from rules
9 | in context 'environment'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/texts/headline.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | - texts
4 | doc: Headline of the article.
5 | id: headline
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/headline
12 | name: Headline
13 | translations:
14 | ru:
15 | doc: Заголовок статьи
16 | name: Заголовок
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptography/sha256hash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptography
3 | doc: SHA256 hash from file or data. Sometimes used as unique id of the data record
4 | id: sha256hash
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/SHA-2
11 | name: SHA256 hash
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/yearmonth.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Defines a part of a date - the year and month. Example YYYY-MM.
4 | id: yearmonth
5 | is_pii: 'False'
6 | links:
7 | - type: other
8 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html
9 | langs:
10 | - common
11 | name: Year and month
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/person_fullname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - pii
4 | doc: Full name of the person as combination of last, first, and sometimes middle name
5 | id: person_fullname
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: Person fullname
10 | patterns:
11 | - rusfullname
12 | - frfullname
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/shipping/tracknum.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | - shipping
4 | doc: Postal tracking number
5 | id: tracknum
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Tracking_number
12 | name: Tracking number (postal)
13 | classification: identifier
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/AU/persons/au_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - AU
6 | doc: Number of Australian National passport
7 | id: aupassport
8 | langs:
9 | - en
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Australian_passport
13 | name: Australian passport number
14 | semantic_type: passport
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Number of Russian foreign passport
7 | id: rupassport
8 | langs:
9 | - ru
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Russian_passport
13 | name: Russian passport foreign number
14 | semantic_type: passport
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/MX/geo/mx_state.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - MX
5 | doc: Mexican state/province by name or code
6 | id: mxstate
7 | is_pii: 'False'
8 | langs:
9 | - es
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/List_of_states_of_Mexico
13 | name: Mexican state (province)
14 | classification: categorical
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_mosdistricts.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Moscow city (Russia) districts
6 | id: rumosdistricts
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Moscow districts (Russia)
11 | translations:
12 | ru:
13 | doc: Округа города Москвы
14 | name: Округа города Москвы
15 | classification: categorical
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/FR/persons/fr_nir.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - FR
6 | doc: France's National ID number
7 | id: frnir
8 | is_pii: 'True'
9 | langs:
10 | - fr
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/INSEE_code
14 | name: France National ID number (social number)
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: Number of US Passport
7 | id: uspassport
8 | is_pii: 'True'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/United_States_passport
14 | name: US passport number
15 | semantic_type: passport
16 | regexp: '[0-9]{9}'
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/environment/emissioninventory.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - environment
3 | id: emissioninventory
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Emission inventory field by name
8 | doc: Datatype for 'Emission inventory field by name' (emissioninventory) from rules
9 | in context 'environment'.
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/en_countryname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: Name of the country in English
4 | id: encountryname
5 | langs:
6 | - common
7 | name: Country name (English)
8 | semantic_type: country
9 | examples:
10 | - value: Afghanistan
11 | description: Afghanistan (country name)
12 | - value: Mexico
13 | description: Mexico (country name)
14 | links: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/person_surname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - pii
4 | doc: Last name of the person. For example, Smith or Ivanova
5 | id: person_surname
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/familyName
12 | name: Person surname/lastname
13 | patterns:
14 | - russurname
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptography/md5hash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptography
3 | doc: Result of the MD5 hash function. Commonly used to generate hash of from file
4 | or data or to use as unique identifier
5 | id: md5hash
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/MD5
12 | name: MD5 hash
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/person_firstname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - pii
4 | doc: First name of the person. For example, Donald or Vladimir or Jack
5 | id: person_firstname
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/givenName
12 | name: Person firstname
13 | patterns:
14 | - rusfirstname
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/CA/geo/ca_province copy.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - CA
5 | doc: Provinces and territories of Canada
6 | id: caprovince
7 | is_pii: 'False'
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada
11 | langs:
12 | - en
13 | name: Province of Canada
14 | classification: categorical
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_intpassport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Number of Russian internal passport
7 | id: ruintpassport
8 | langs:
9 | - ru
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Internal_passport_of_Russia
13 | name: Russian internal passport number
14 | semantic_type: passport
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/nationality.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | doc: Nationality of the person from list of unique code from one of code lists or
5 | defined somehow else.
6 | id: nationality
7 | is_pii: 'True'
8 | langs:
9 | - common
10 | name: Person nationality
11 | classification: categorical
12 | patterns:
13 | - ennationality
14 | links: []
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/texts/description.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Item, Object or entity description, common for any object
4 | id: description
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Description
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/description
12 | translations:
13 | ru:
14 | doc: Описание объекта или сущности
15 | name: Описание
16 | examples: []
17 | wikidata_property: ''
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/science/scopusauthoirid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - science
4 | doc: identifier for an author assigned in Scopus bibliographic database
5 | id: scopusauthoirid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P1153
12 | name: Scopus author ID
13 | classification: identifier
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/texts/comment.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | - texts
4 | doc: Comment/note for the object or entity.
5 | id: comment
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/Comment
12 | name: Comment / Note
13 | translations:
14 | ru:
15 | doc: Комментарий или примечание к объекту
16 | name: Комментарий / примечание
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/GB/persons/uk_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - GB
6 | doc: Number of the passport issued by the United Kingdom
7 | id: ukpassport
8 | is_pii: 'True'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/British_passport
14 | name: British passport number
15 | semantic_type: passport
16 | regexp: '[0-9]{9}'
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_mosadmareas.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Moscow city (Russia) administrative areas
6 | id: rumosadmareas
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Moscow administrative areas (Russia)
11 | translations:
12 | ru:
13 | doc: Справочник районов города Москвы
14 | name: Районы города Москвы
15 | classification: categorical
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/TH/persons/th_idcard.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - TH
6 | doc: Number of the Thai identity card
7 | id: thidcard
8 | is_pii: 'True'
9 | langs:
10 | - th
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Thai_identity_card
14 | regexp: ^\b\d{1}-\d{4}-\d{5}-\d{2}-\d\b$
15 | name: Thai identity card
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/year.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: A calendar year in western notation like 2022 or 1913
4 | id: year
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Year
11 | name: Year
12 | regexp: \d{4}
13 | examples:
14 | - value: '2022'
15 | description: Year 2022
16 | - value: '1243'
17 | description: Year 1243
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/CA/persons/ca_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - CA
6 | doc: Number of Canadian passport
7 | id: capassport
8 | is_pii: 'True'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Canadian_passport
14 | name: Canadian passport number
15 | semantic_type: passport
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/identifiers/guid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - identifiers
3 | doc: A universally unique identifier (UUID) is a 128-bit label used for information
4 | in computer systems. The term globally unique identifier (GUID) is also used.
5 | id: guid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | name: GUID (Globally unique identifier)
10 | classification: identifier
11 | links: []
12 | examples: []
13 | wikidata_property: ''
14 | translations: {}
15 | regexp: ''
16 |
--------------------------------------------------------------------------------
/data/datatypes/ES/persons/es_driver_license.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - ES
6 | doc: Alphanumeric value of the Spain driver license
7 | id: esdriverlic
8 | is_pii: 'True'
9 | langs:
10 | - es
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Driving_licence_in_Spain
14 | name: Spain driver license number
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/dayofweek.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Week day name
4 | id: dayofweek
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Workweek_and_weekend
11 | name: Day of week
12 | patterns:
13 | - endayofweek
14 | - rusdayofweek
15 | translations:
16 | ru:
17 | doc: Название для недели
18 | name: День недели
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/deathday.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - datetime
4 | doc: Date of death of the person
5 | id: deathday
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Death_anniversary
12 | name: Date of death
13 | parent:
14 | type: date
15 | translations:
16 | ru:
17 | doc: Дата смерти человека
18 | name: Дата смерти
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/dublincore/dclanguage.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - dublincore
3 | doc: A language of the resource.
4 | id: dclanguage
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: dublincore
10 | url: http://purl.org/dc/terms/language
11 | name: Language (Dublin Core)
12 | examples:
13 | - value: eng
14 | description: English language
15 | translations:
16 | ru:
17 | doc: Язык ресурса
18 | name: Язык
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/iso3166code.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: identifier for a country subdivision per ISO 3166-2 (include country code)
4 | id: iso3166code
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P300
11 | name: identifier for a country subdivision per ISO 3166-2
12 | classification: categorical
13 | examples: []
14 | wikidata_property: ''
15 | translations: {}
16 | regexp: ''
17 |
--------------------------------------------------------------------------------
/data/datatypes/DE/persons/de_driver_license.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - DE
6 | doc: Alphanumeric value of the German driver license
7 | id: dedriverlic
8 | is_pii: 'True'
9 | langs:
10 | - de
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Driving_licence_in_Germany
14 | name: Germany driver license number
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/finances/ru_okvalpha.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | country:
4 | - RU
5 | doc: Russian currency codes (OKV classifier)
6 | id: okvalpha
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian currency code
11 | translations:
12 | ru:
13 | doc: Код валюты по справочнику ОКВ (Общероссийский классификатор валют)
14 | name: Код валюты по ОКВ
15 | semantic_type: currency
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/SE/persons/se_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - SE
6 | doc: Number of Sweden passport, 8-digits
7 | id: sepassport
8 | is_pii: 'True'
9 | langs:
10 | - se
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Swedish_passport
14 | regexp: ^[0-9]{8}$
15 | name: Sweden passport number
16 | semantic_type: passport
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/iso4217curname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: ISO 4217 Currency name
4 | id: iso4217curname
5 | langs:
6 | - common
7 | links:
8 | - type: other
9 | url: https://www.iso.org/iso-4217-currency-codes.html
10 | name: ISO 4217 Currency name
11 | semantic_type: currency
12 | examples:
13 | - value: palladium
14 | description: palladium
15 | - value: euro
16 | description: euro
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/ES/persons/es_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - ES
6 | doc: Number of Spanish passport
7 | id: espassport
8 | is_pii: 'True'
9 | langs:
10 | - es
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Spanish_passport
14 | regexp: ^[A-z0-9]{2,3}[0-9]{6}$
15 | name: Spanish passport number
16 | semantic_type: passport
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/FR/persons/fr_passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - FR
6 | doc: Number of French passport
7 | id: frpassport
8 | is_pii: 'True'
9 | langs:
10 | - fr
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/French_passport
14 | regexp: ^[0-9]{2}[A-z]{2}[0-9]{5}$
15 | name: French passport number
16 | semantic_type: passport
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/finances/ru_okvname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | country:
4 | - RU
5 | doc: Russian currency names (OKV classifier)
6 | id: okvname
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian currency name
11 | translations:
12 | ru:
13 | doc: Название валюты по справочнику ОКВ (Общероссийский классификатор валют)
14 | name: Название валюты по ОКВ
15 | semantic_type: currency
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/common/name.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Name of the item, object or entity.
4 | id: name
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Name
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/name
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P2561
14 | wikidata_property: P2561
15 | translations:
16 | ru:
17 | doc: Название объекта или сущности
18 | name: Название
19 | examples: []
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/quarter.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: A calendar quater of the year
4 | id: quarter
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Calendar_year#Quarters
11 | name: Quarter of the year
12 | regexp: '[1234]'
13 | examples:
14 | - value: '1'
15 | description: First quarter
16 | - value: '2'
17 | description: Secord quarter
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/programminglang.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: Programming language
4 | id: programminglang
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P277
11 | - type: schema.org
12 | url: https://schema.org/programmingLanguage
13 | name: Programming language
14 | wikidata_property: P277
15 | examples:
16 | - value: C
17 | description: Linux
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/airlinecode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: Unique code of the airline provided by IATA or ICAP
4 | id: airlinecode
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Airline_codes
11 | name: Airline code
12 | patterns:
13 | - icaoairlinecode
14 | - iataairlinecode
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/finances/ru_sbankaccount.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | country:
4 | - RU
5 | doc: A bank account is a financial account maintained by a bank or other financial
6 | institution in which the financial transactions between the bank and a customer
7 | are recorded.
8 | id: rusbankaccount
9 | langs:
10 | - ru
11 | name: Bank account in Russia
12 | semantic_type: bankaccount
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/date.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Date with one of known notations
4 | id: date
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Date_and_time_notation
11 | - type: schema.org
12 | url: https://schema.org/Date
13 | name: Date
14 | translations:
15 | ru:
16 | doc: Дата в одной из общепринятых нотаций
17 | name: Дата
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/timezone.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: A time zone is an area that observes a uniform standard time for legal, commercial
4 | and social purposes
5 | id: timezone
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Time_zone
12 | name: Time zone
13 | patterns:
14 | - ianatimezoneid
15 | - utctimezoneoffset
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/dublincore/dctitle.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - dublincore
3 | doc: A name given to the resource.
4 | id: dctitle
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: dublincore
10 | url: http://purl.org/dc/terms/title
11 | name: Title (Dublin Core)
12 | examples:
13 | - value: Picture of the cat
14 | description: Title of the resource
15 | translations:
16 | ru:
17 | doc: Имя данное ресурсу
18 | name: Заголовок
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/CA/persons/ca_on_ohip.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - CA
6 | doc: Canada Ontario Health Insurance Plan (OHIP) number
7 | id: caonohip
8 | is_pii: 'True'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Ontario_Health_Insurance_Plan
14 | name: Canada Ontario Health Insurance Plan (OHIP) number
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/industry/ru_okved.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | country:
4 | - RU
5 | doc: Russian economic classification code (OKVED)
6 | id: okved
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian economic classification code (OKVED)
11 | translations:
12 | ru:
13 | doc: Код по общероссийскому классификатору видов экономической деятельности
14 | name: Код ОКВЭД
15 | classification: categorical
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/filesize.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: size of a file
4 | id: filesize
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3575
11 | - type: schema.org
12 | url: https://schema.org/fileSize
13 | name: File size
14 | parent:
15 | type: datasize
16 | wikidata_property: P3575
17 | examples:
18 | - value: 839 megabyte
19 | description: FreeCAD
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/useraccounts/twitter_username.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - useraccounts
5 | doc: Person username in Twitter
6 | id: twitter_username
7 | is_pii: 'True'
8 | parent:
9 | type: username
10 | langs:
11 | - common
12 | wikidata_property: P2002
13 | links:
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P2002
16 | name: Twitter username
17 | regexp: '[0-9A-Za-z_]{1,20}'
18 | classification: identifier
19 | examples: []
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/CA/geo/ca_province.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - CA
5 | doc: Provinces and territories of Canada alpha2 code
6 | id: caprovincecode
7 | is_pii: 'False'
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada
11 | langs:
12 | - en
13 | regexp: (NL|PE|NS|NB|QC|ON|MB|SK|AB|BC|YT|NT|NU)
14 | name: Province of Canada alpha2 code
15 | semantic_type: caprovince
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 |
--------------------------------------------------------------------------------
/data/datatypes/CA/persons/ca_driver_license.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - CA
6 | doc: Alphanumeric value of the Canada driver license
7 | id: cadriverlic
8 | is_pii: 'True'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Driver%27s_licences_in_Canada
14 | regexp: ^[A-Z](?:\d[- ]*){14}$
15 | name: Canada driver license number
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/MX/geo/mx_iso3166_2.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - MX
5 | doc: ISO 3166-2:MX 3-letters alphanumeric codes used to identify Mexican states.
6 | id: iso3166_mx3a
7 | is_pii: 'False'
8 | langs:
9 | - es
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Template:Mexico_State-Abbreviation_Codes
13 | name: ISO 3166-2:MX (3-letters code)
14 | classification: categorical
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/RU/industry/ru_okpd.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | country:
4 | - RU
5 | doc: Russian product code (OKPD) used for government procurement and budget planning
6 | purposes
7 | id: okpd
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian product code (OKPD)
12 | translations:
13 | ru:
14 | doc: Код по Общероссийскому классификатору продукции
15 | name: Код ОКПД
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/common/hexcolor.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Color of subject
4 | id: hexcolor
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: sRGB color hex triplet (hex color)
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P465
12 | wikidata_property: P465
13 | translations:
14 | ru:
15 | doc: Цвет sRGB в шестнадцатеричной кодировке
16 | name: Цвет в шестнадцатеричной кодировке
17 | semantic_type: color
18 | examples: []
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/useraccounts/skype_username.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - useraccounts
5 | doc: Person username in Skype
6 | id: skype_username
7 | is_pii: 'True'
8 | parent:
9 | type: username
10 | langs:
11 | - common
12 | name: Skype username
13 | regexp: '[a-zA-Z][a-zA-Z0-9_\-\,\.]{5,31}'
14 | wikidata_property: P2893
15 | links:
16 | - type: wikidata
17 | url: https://www.wikidata.org/wiki/Property:P2893
18 | classification: identifier
19 | examples: []
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/medical/ru_medicinetradename.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | country:
4 | - RU
5 | doc: Trade name of the medicine in Russian.
6 | id: rumedicinetradename
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Medicine trade name (Russian)
11 | translations:
12 | ru:
13 | doc: Торговое название для лекарственных средств
14 | name: Торговое наименование лекарственного средства
15 | classification: identifier
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/common/color.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Color of subject
4 | id: color
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Color
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P462
12 | - type: schema.org
13 | url: https://schema.org/color
14 | wikidata_property: P462
15 | translations:
16 | ru:
17 | doc: Цвет как физическая характеристика объекта
18 | name: Цвет
19 | classification: categorical
20 | examples: []
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/companies/iso6523.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | doc: An organization identifier as defined in ISO 6523(-1)
4 | is_pii: 'False'
5 | langs:
6 | - common
7 | links:
8 | - type: schema.org
9 | url: https://schema.org/iso6523Code
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/ISO/IEC_6523
12 | id: iso6523code
13 | name: ISO 6523 organization identifier
14 | classification: identifier
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/eui48.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | - pii
4 | doc: A media access control address (MAC address) is a unique identifier assigned
5 | to a network interface controller (NIC) for use as a network address in communications
6 | within a network segment.
7 | id: eui48
8 | is_pii: 'True'
9 | langs:
10 | - common
11 | name: MAC Address (EUI48) of the device
12 | classification: identifier
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/FR/geo/fr_epcicode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - FR
5 | doc: Etablissements publics de cooperation intercommunale (EPCI). French administrative
6 | geo code
7 | id: epcicode
8 | is_pii: 'False'
9 | langs:
10 | - fr
11 | links:
12 | - type: other
13 | url: https://www.insee.fr/fr/information/2510634
14 | name: Etablissements publics de cooperation intercommunale (EPCI)
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_feddistrict.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Name of the federal district of Russia. Full or short.
6 | id: rusfeddistrict
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian federal district name
11 | translations:
12 | ru:
13 | doc: Наименование федерального округа России полное или сокращённое
14 | name: Наименование федерального округа России
15 | classification: categorical
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_csrcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian budget target item of expenditure code
6 | id: csrcode
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian budget target item of expenditure code
11 | translations:
12 | ru:
13 | doc: Целевая статья расходов российского бюджета
14 | name: Целевая статья расходов российского бюджета
15 | classification: categorical
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipv6subnet.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: Range of IPv6 addresses
4 | id: ipv6subnet
5 | is_pii: 'True'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3793
11 | name: IPv4 routing prefix (subnet)
12 | wikidata_property: P3793
13 | regexp: '[\d:a-f\/]{4,30}'
14 | examples:
15 | - value: 2001:610::/29
16 | description: SURFNet
17 | - value: 2001:630:440::/44
18 | description: University of Oxford
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/science/researcherid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - science
3 | - persons
4 | doc: 'ResearcherID is an identifying system for scientific authors. The system was
5 | introduced in January 2008 by Thomson Reuters. '
6 | id: researcherid
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/ResearcherID
13 | name: ResearcherID
14 | classification: identifier
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/GB/persons/uk_utr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - GB
6 | doc: A UTR (unique taxpayer reference) number is a 10-digit number completely unique
7 | to each and every UK taxpayer.
8 | id: ukutr
9 | is_pii: 'True'
10 | langs:
11 | - en
12 | links:
13 | - type: other
14 | url: https://www.gov.uk/find-lost-utr-number
15 | name: Unique Taxpayer Reference (UTR)
16 | regexp: '[0-9]{10}'
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/dublincore/dcabstract.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - dublincore
3 | doc: A summary of the resource.
4 | id: dcabstract
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: dublincore
10 | url: http://purl.org/dc/terms/abstract
11 | name: Abstract (Dublin Core)
12 | examples:
13 | - value: A summary of the text.
14 | description: A summary of the text.
15 | translations:
16 | ru:
17 | doc: Краткое изложение ресурса.
18 | name: Кратное изложение
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/tools/pii/piicatcher.yaml:
--------------------------------------------------------------------------------
1 | id: piicatcher
2 | category: pii
3 | name: PII Catcher
4 | doc: 'PIICatcher is a scanner for PII and PHI information. It finds PII data in your databases and file systems and tracks critical data.'
5 | website: https://github.com/tokern/piicatcher
6 | supported_types:
7 | - email
8 | - birthday
9 | - gender
10 | - nationality
11 | - person_fullaname
12 | - person_firstname
13 | - person_lastname
14 | - username
15 | - password
16 | - address
17 | - usssn
18 |
--------------------------------------------------------------------------------
/.idea/metacrafter-registry.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/data/datatypes/AU/finances/au_bsb.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: A bank state branch is the name used in Australia for a bank code, which is a
4 | branch identifier.
5 | id: aubsb
6 | is_pii: 'False'
7 | langs:
8 | - en
9 | country:
10 | - AU
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Bank_state_branch
14 | regexp: ^[0-9]{3}-?[0-9]{3}$
15 | name: Australia bank state branch (BSB) code
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/asn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: autonomous system number
4 | id: asn
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3797
11 | name: Autonomous system number (ASN)
12 | wikidata_property: P3797
13 | regexp: ' [1-9]\d*'
14 | examples:
15 | - value: '2532'
16 | description: Library of Congress
17 | - value: '174'
18 | description: Cogent Communications
19 | classification: identifier
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/en_nationality.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | doc: Nationality of the person written in English
5 | id: ennationality
6 | is_pii: 'True'
7 | langs:
8 | - common
9 | name: Person nationality in English
10 | classification: categorical
11 | semantic_type: nationality
12 | examples:
13 | - value: Belorussian
14 | description: Belorussian nationality
15 | - value: Spanish
16 | description: Spanish nationality
17 | links: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/passport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | doc: Person passport, usually unique personal information that should be shared online.
5 | Identifies personal document issued to person by government authority
6 | id: passport
7 | is_pii: 'True'
8 | langs:
9 | - common
10 | name: Person passport number (universal)
11 | patterns:
12 | - ruintpassport
13 | - uspassport
14 | classification: identifier
15 | links: []
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/imphash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: hash based on PE file imports (MD5 hash)
4 | id: imphash
5 | is_pii: 'False'
6 | parent:
7 | type: md5hash
8 | langs:
9 | - common
10 | links:
11 | - type: other
12 | url: https://developers.virustotal.com/reference/pe_info
13 | name: imphash
14 | examples:
15 | - value: 316cd668ed705c998eae8d3bd7bd168f
16 | description: Virus total file Twain_32.dll
17 | classification: identifier
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_npa.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: 'Russian regulatory legal act. For example: Executive order'
6 | id: runpa
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian regulatory legal act
11 | parent:
12 | type: legislationname
13 | translations:
14 | ru:
15 | doc: Нормативно-правовой документ в России. Например, постановление правительства
16 | name: Нормативно-правовой документ
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/datasize.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: size of a software, dataset, neural network, or individual file
4 | id: datasize
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3575
11 | name: Data size
12 | wikidata_property: P3575
13 | examples:
14 | - value: 839 megabyte
15 | description: FreeCAD
16 | - value: 5.15 gigabyte
17 | description: Sly Cooper and the Thievius Raccoonus
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/useraccounts/github_username.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - useraccounts
5 | doc: Person/organization username in Github
6 | id: github_username
7 | is_pii: 'True'
8 | parent:
9 | type: username
10 | langs:
11 | - common
12 | wikidata_property: P2037
13 | links:
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P2037
16 | name: Github username
17 | regexp: '[0-9A-Za-z]([0-9A-Za-z\-]{0,37}[0-9A-Za-z])?'
18 | classification: identifier
19 | examples: []
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/ES/persons/es_nif_number.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - ES
6 | doc: Spanish tax identification number used to identify tax paying individuals
7 | id: esnif
8 | is_pii: 'True'
9 | langs:
10 | - es
11 | links:
12 | - type: wikipedia
13 | url: https://es.wikipedia.org/wiki/N%C3%BAmero_de_identificaci%C3%B3n_fiscal
14 | regexp: '[0-9]?[0-9]{7}[-]?[A-Z]'
15 | name: Spanish Tax identification number
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_okogu.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | country:
4 | - RU
5 | doc: Russian government and governance code. OKOGU classifier
6 | id: okogu
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian government and governance code (OKOGU)
11 | translations:
12 | ru:
13 | doc: ОКОГУ - это общероссийский классификатор органов государственной власти
14 | и управления
15 | name: Код ОКОГУ
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/geonamesid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: identifier in the GeoNames geographical database
4 | id: geonamesid
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P1566
11 | name: Geonames ID
12 | wikidata_property: P1566
13 | regexp: '[1-9][0-9]{0,8}|'
14 | examples:
15 | - value: '935877'
16 | description: Piton de la Fournaise
17 | - value: '1277082'
18 | description: Baranagar
19 | classification: identifier
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipv4.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | - pii
4 | doc: Internet Protocol version 4 (IPv4) is the fourth version of the Internet Protocol
5 | (IP). It is one of the core protocols of standards-based internetworking methods
6 | in the Internet and other packet-switched networks.
7 | id: ipv4
8 | is_pii: 'True'
9 | langs:
10 | - common
11 | name: Internet Protocol version 4 (IPv4)
12 | classification: identifier
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/tools/other/soda.yaml:
--------------------------------------------------------------------------------
1 | id: soda
2 | category: other
3 | name: Soda
4 | doc: 'Soda allows everyone on your data team to find, analyze, and resolve data issues. Our open-source tools and data observability platform bring everyone closer to the data, resulting in data products that you can trust.'
5 | website: https://docs.soda.io/soda-sql/sql_metrics.html#valid-format-values
6 | supported_types:
7 | - date
8 | - email
9 | - ipv4
10 | - ipv6
11 | - ipaddr
12 | - time
13 | - phone
14 | - uuid
15 |
16 |
--------------------------------------------------------------------------------
/data/datatypes/ES/geo/es_postcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - ES
5 | - MX
6 | doc: Spanish postal code (by language)
7 | id: espostcode
8 | langs:
9 | - es
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Postal_codes_in_Spain
13 | name: Spanish postal code (by language)
14 | semantic_type: postindex
15 | regexp: \d{5}
16 | examples:
17 | - value: '07002'
18 | description: Palma, Majorca
19 | - value: '27722'
20 | description: Veigas, Asturias
21 | wikidata_property: ''
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/RU/medical/ru_medicineregnum.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | country:
4 | - RU
5 | doc: Russian medicine registration code
6 | id: rumedicineregnum
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Registration code of medicine (Russian)
11 | translations:
12 | ru:
13 | doc: Код регистрации лекарственного средства в России. Реестр Росздравнадзора
14 | name: Код регистрации лекарственного средства в России
15 | classification: identifier
16 | links: []
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/datetime.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Date and time with one of known notations
4 | id: datetime
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Date_and_time_notation
11 | - type: schema.org
12 | url: https://schema.org/DateTime
13 | name: Date and time
14 | translations:
15 | ru:
16 | doc: Дата и время в одной из общепринятых нотаций
17 | name: Дата и время
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/telfhash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: Files Trend Micro ELF Hash (aka telfhash)
4 | id: telfhash
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://developers.virustotal.com/reference/file-telfhash
11 | name: telfhash
12 | examples:
13 | - value: t167319f0a1c160d81cb547cbc383bfae309821ae56faaa74dba48b425b7f51c1903f5f5
14 | description: Virus total file libmongocrypt.so
15 | classification: identifier
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/useraccounts/instagram_username.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - useraccounts
5 | doc: Person username in Instagram
6 | id: instagram_username
7 | is_pii: 'True'
8 | parent:
9 | type: username
10 | langs:
11 | - common
12 | wikidata_property: P2003
13 | links:
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P2003
16 | name: Instagram username
17 | regexp: ([0-9a-z_](?:(?:[0-9a-z_]|(?:\.(?!\.))){0,28}(?:[0-9a-z_]))?)
18 | classification: identifier
19 | examples: []
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/CA/persons/ca_sin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - CA
6 | doc: 'Social insurance number (SIN) is a number issued in Canada to administer various
7 | government programs. '
8 | id: casin
9 | is_pii: 'True'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Social_insurance_number
15 | regexp: (\d{3}-\d{3}-\d{3})|(\d{9})
16 | name: Canada social insurance number (SIN)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_okopfname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Type of business entity in Russia, classifier OKOPF
6 | id: okopfname
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Type of business entity (Russian, OKOPF)
11 | translations:
12 | ru:
13 | doc: Наименование организационно-правовой формы юридического лица по справочнику
14 | ОКОПФ
15 | name: Организационно-правовая форма
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/identifiers/mongodbid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: MongoDB unique id assigned to each row of each collection in MongoDB as field
4 | _id
5 | id: mongodbid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | name: MongoDB unique Object ID
10 | examples:
11 | - value: 507f1f77bcf86cd799439011
12 | description: Uninue MongoDB ObjectID
13 | - value: 507f191e810c19729de860ea
14 | description: Uninue MongoDB ObjectID
15 | classification: identifier
16 | links: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/FR/geo/fr_postcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - FR
5 | doc: French postal code
6 | id: frpostcode
7 | langs:
8 | - fr
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Postal_codes_in_France
12 | - type: other
13 | url: https://www.wikidata.org/wiki/Q1105640
14 | name: French postal code
15 | semantic_type: postindex
16 | regexp: \d{5}
17 | examples:
18 | - value: '75008'
19 | description: Paris
20 | - value: '97439'
21 | description: Sainte-Rose
22 | wikidata_property: ''
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/iso4217curcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: ISO 4217 Currency code
4 | id: iso4217curcode
5 | langs:
6 | - common
7 | links:
8 | - type: other
9 | url: https://www.iso.org/iso-4217-currency-codes.html
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P498
12 | name: ISO 4217 Currency code
13 | semantic_type: currency
14 | wikidata_property: P498
15 | regexp: '[A-Z]{3}'
16 | examples:
17 | - value: XPD
18 | description: palladium
19 | - value: EUR
20 | description: euro
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/telecom/imsi.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - telecom
3 | - pii
4 | doc: The international mobile subscriber identity (IMSI) is a number that uniquely
5 | identifies every user of a cellular network.
6 | id: imsi
7 | is_pii: 'True'
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/International_mobile_subscriber_identity
11 | langs:
12 | - common
13 | name: The international mobile subscriber identity (IMSI)
14 | classification: identifier
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/tools/other/auctus.yaml:
--------------------------------------------------------------------------------
1 | id: auctus
2 | category: other
3 | name: Auctus
4 | doc: 'This project is a web crawler and search engine for datasets, specifically meant for data augmentation tasks in machine learning. It is able to find datasets in different repositories and index them for later retrieval.'
5 | website: https://gitlab.com/ViDA-NYU/auctus/auctus
6 | supported_types:
7 | - datetime
8 | - id
9 | - latitude
10 | - longitude
11 | - geopoint
12 | - address
13 | - url
14 | - filename
15 | - boolean
16 |
17 |
--------------------------------------------------------------------------------
/data/datatypes/GB/geo/uk_postalcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - GB
5 | doc: Postal codes used in the United Kingdom, British Overseas Territories and Crown
6 | dependencies are known as postcodes (originally, postal codes).
7 | id: ukpostalcode
8 | langs:
9 | - en
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom
13 | name: UK Postal code
14 | semantic_type: postindex
15 | classification: categorical
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_ikuiko.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | - companies
4 | country:
5 | - RU
6 | doc: Unique code assigned to all government customers in Russian Federation
7 | id: ruikuiko
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian government customers unique code
12 | translations:
13 | ru:
14 | doc: Код присваиваемый государственным заказчикам в Российской Федерации
15 | name: Идентификационный код заказчика
16 | classification: identifier
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_ikz.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | - companies
4 | country:
5 | - RU
6 | doc: Unique id assigned to all government procurement procedures in Russian Federation
7 | id: ruikz
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian government procurement unique id
12 | translations:
13 | ru:
14 | doc: Код присваиваемый государственным закупкам в Российской Федерации
15 | name: Идентификационный код закупки
16 | classification: identifier
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/wifaddrcomp.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | id: wifaddrcomp
4 | doc: WIF, compressed pubkey
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://allprivatekeys.com/bitcoin-address-format
11 | name: WIF, compressed pubkey
12 | examples:
13 | - value: L1aW4aubDFB7yfras2S1mN3bqg9nwySY8nkoLmJebSLD5BWv3ENZ
14 | description: Example from https://allprivatekeys.com/bitcoin-address-format
15 | classification: identifier
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipv6.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | - pii
4 | doc: Internet Protocol version 6 (IPv6) is the most recent version of the Internet
5 | Protocol (IP), the communications protocol that provides an identification and location
6 | system for computers on networks and routes traffic across the Internet.
7 | id: ipv6
8 | is_pii: 'True'
9 | langs:
10 | - common
11 | name: Internet Protocol version 6 (IPv6)
12 | classification: identifier
13 | links: []
14 | examples: []
15 | wikidata_property: ''
16 | translations: {}
17 | regexp: ''
18 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_okopf.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | - common
4 | country:
5 | - RU
6 | doc: Code of type of business entity in Russia, classifier OKOPF
7 | id: okopf
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Code of type of business entity (Russian, OKOPF)
12 | translations:
13 | ru:
14 | doc: Код организационно-правовой формы юридического лица по справочнику ОКОПФ
15 | name: Код организационно-правовая формы по ОКОПФ
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/US/industry/us_naicscode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - industry
3 | country:
4 | - US
5 | doc: Classification in the North American Industry Classification System
6 | id: naicscode
7 | is_pii: 'False'
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3224
11 | langs:
12 | - en
13 | name: NAICS code
14 | wikidata_property: P3224
15 | regexp: \d{2,6}
16 | examples:
17 | - value: '517919'
18 | description: CelerSMS
19 | - value: '512240'
20 | description: recording studio
21 | classification: categorical
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/wifaddruncomp.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | id: wifaddruncomp
4 | doc: WIF, uncompressed pubkey
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://allprivatekeys.com/bitcoin-address-format
11 | name: WIF, uncompressed pubkey
12 | examples:
13 | - value: 5Hwgr3u458GLafKBgxtssHSPqJnYoGrSzgQsPwLFhLNYskDPyyA
14 | description: Example from https://allprivatekeys.com/bitcoin-address-format
15 | classification: identifier
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/continent.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: A continent is any of several large landmasses. Related to UN unm49 dataclass
4 | id: continent
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Continent
11 | - type: wikidata
12 | url: https://www.wikidata.org/wiki/Property:P30
13 | wikidata_property: P30
14 | examples:
15 | - value: Eurasia
16 | description: Eurasia
17 | name: Continent name or identifier
18 | classification: categorical
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/government/legislationname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | doc: A legal document such as an act, decree, bill, etc. (enforceable or not) or a
4 | component of a legal act (like an article).
5 | id: legislationname
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/Legislation
12 | name: Legislation name
13 | translations:
14 | ru:
15 | doc: Название нормативного документа
16 | name: Название нормативного документа
17 | examples: []
18 | wikidata_property: ''
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/langs.yaml:
--------------------------------------------------------------------------------
1 | - id: common
2 | name: Common to most languages
3 | - id: en
4 | name: English
5 | - id: ru
6 | name: Russian
7 | - id: fr
8 | name: French
9 | - id: es
10 | name: Spanish
11 | - id: it
12 | name: Italian
13 | - id: pt
14 | name: Portugenese
15 | - id: da
16 | name: Danish
17 | - id: fi
18 | name: Finnish
19 | - id: se
20 | name: Swedish
21 | - id: th
22 | name: Thai
23 | - id: de
24 | name: German
25 |
26 | # Added to support languages referenced in datatype definitions
27 | - id: nl
28 | name: Dutch
--------------------------------------------------------------------------------
/data/datatypes/any/science/openalexid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - science
3 | - identifiers
4 | doc: Identifier for works, authors, institutes, venues, concepts/subjects in OpenAlex
5 | id: openalexid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: other
11 | url: https://docs.openalex.org/about-the-data#the-openalex-id
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P10283
14 | name: OpenAlex ID
15 | regexp: '[ACIVW][1-9]\d{3,9}'
16 | wikidata_property: P10283
17 | classification: identifier
18 | examples: []
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/tlsh.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: tlsh is a hash used by Trend Micro which can be used for similarity comparisons.
4 | id: tlsh
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://developers.virustotal.com/reference/files-tlsh
11 | name: TLSH hash
12 | examples:
13 | - value: T18F535B52F19146B7CD502278DF2CEB3199BFE134871816E3634882B6576B0D1AB7E3CA
14 | description: Virus total file Twain_32.dll
15 | classification: identifier
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/GB/geo/uk_wardcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - GB
5 | doc: The wards and electoral divisions in the United Kingdom are electoral districts
6 | at sub-national level represented by one or more councillors.
7 | id: ukwardcode
8 | is_pii: 'False'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Wards_and_electoral_divisions_of_the_United_Kingdom
14 | name: Ward code (United Kingdom)
15 | classification: categorical
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/GB/geo/uk_wardname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - GB
5 | doc: The wards and electoral divisions in the United Kingdom are electoral districts
6 | at sub-national level represented by one or more councillors.
7 | id: ukwardname
8 | is_pii: 'False'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Wards_and_electoral_divisions_of_the_United_Kingdom
14 | name: Ward name (United Kingdom)
15 | classification: categorical
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/US/telecom/fccid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - telecom
3 | doc: Identifier of a product assigned by the grantee in an application to the Federal
4 | Communications Commission of the United States
5 | id: fccid
6 | is_pii: 'False'
7 | country:
8 | - US
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P7290
12 | langs:
13 | - en
14 | wikidata_property: P7290
15 | name: FCC Product Code (FCC ID)
16 | examples:
17 | - value: A1395
18 | description: iPad 2, model A1395
19 | classification: identifier
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/utctimezoneoffiset.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Difference between Coordinated Universal Time (UTC) and this timezone.
4 | id: utctimezoneoffset
5 | langs:
6 | - common
7 | links:
8 | - type: wikidata
9 | url: https://www.wikidata.org/wiki/Property:P2907
10 | name: UTC Timezone offset
11 | semantic_type: timezone
12 | wikidata_property: P2907
13 | regexp: ^UTC(?:Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])$
14 | examples:
15 | - value: UTC+08:00
16 | description: 8 hour
17 | - value: UTC+01:24
18 | description: 1.4 hour
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/vhash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: Virustotal in-house similarity clustering algorithm value, based on a simple
4 | structural feature hash allows you to find similar files
5 | id: vhash
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: other
11 | url: https://developers.virustotal.com/reference/files
12 | name: VHash
13 | examples:
14 | - value: 1640566d1555156az3b26kz1fez5
15 | description: Virus total file Twain_32.dll
16 | classification: identifier
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/FR/persons/fr_cni.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - FR
6 | doc: The French national identity card (CNI) is an official identity document consisting
7 | of an electronic ID-1 card bearing a photograph, name and address.
8 | id: frcni
9 | is_pii: 'True'
10 | langs:
11 | - fr
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/National_identity_card_(France)
15 | regexp: ^[0-9]{12}$
16 | name: French national identity card
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/FR/geo/fr_siretcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - FR
5 | doc: 'The SIRET code (French: Système d’identification du répertoire des établissements),
6 | or SIRET number,[1] is an INSEE code which allows the geographic identification
7 | of any French establishment or business. '
8 | id: siretcode
9 | is_pii: 'False'
10 | langs:
11 | - fr
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/SIRET_code
15 | name: SIRET Code
16 | classification: categorical
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_budgetname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Name of the budget of Russian government at federal, regional or local level
6 | id: budgetname
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian government budget name
11 | translations:
12 | ru:
13 | doc: Название бюджета по справочнику наименований и кодов бюджета ведомом Минфином
14 | России и Федеральным казначейством.
15 | name: Наименование бюджета
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/companies/en_companyname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | doc: English written company/business/legal name of business entity
4 | id: encompanyname
5 | langs:
6 | - en
7 | name: Company/business name in English
8 | semantic_type: orgname
9 | examples:
10 | - value: AARDMAN ANIMATIONS LIMITED
11 | description: AARDMAN ANIMATIONS LIMITED
12 | - value: ULSTER INNOVATION FUND LP
13 | description: ULSTER INNOVATION FUND LP
14 | - value: GREEN HILL TRUST
15 | description: GREEN HILL TRUST
16 | links: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/persons/jobtitle.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - companies
4 | country:
5 | - RU
6 | doc: Work positions / job title
7 | id: jobtitle
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | links:
12 | - type: schema.org
13 | url: https://schema.org/jobTitle
14 | name: Work position / Job title
15 | patterns:
16 | - rusworkposition
17 | translations:
18 | ru:
19 | doc: Должность персоны в организации
20 | name: Должность
21 | examples:
22 | - value: Aid worker/humanitarian worker
23 | description: Aid worker job title
24 | wikidata_property: ''
25 | regexp: ''
26 |
--------------------------------------------------------------------------------
/data/datatypes/AU/companies/au_acn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - AU
5 | doc: An Australian Company Number (usually shortened to ACN) is a unique identifier
6 | required by every company registered under Australia’s Corporations Act 2001 (Cth).
7 | id: auacn
8 | is_pii: 'False'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Australian_Company_Number
14 | regexp: \d{3}\s\d{3}\s\d{3}
15 | name: Australian Company Number (ACN)
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_inn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Russian organization/person tax identification number (INN) is unique for each
6 | person and organization.
7 | id: inn
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/VAT_identification_number
14 | name: INN (Russian tax identifier)
15 | regexp: \d{10}
16 | examples:
17 | - value: ''
18 | description: ''
19 | - value: ''
20 | description: ''
21 | classification: identifier
22 | wikidata_property: ''
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/US/finances/us_bankaccount.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | country:
4 | - RU
5 | doc: A bank account in the financial institutuin in United States
6 | id: usbankaccount
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Bank_account
13 | name: Bank account in United States
14 | regexp: '[0-9]{8,17}'
15 | translations:
16 | ru:
17 | doc: Вид банковского счета открываемого в банках США
18 | name: Банковский счет в банке США
19 | semantic_type: bankaccount
20 | examples: []
21 | wikidata_property: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/ianatimezoneid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Difference between Coordinated Universal Time (UTC) and this timezone.
4 | id: ianatimezoneid
5 | langs:
6 | - common
7 | links:
8 | - type: wikidata
9 | url: https://www.wikidata.org/wiki/Property:P6687
10 | name: IANA Timezone ID
11 | semantic_type: timezone
12 | wikidata_property: P6687
13 | regexp: ^UTC(?:Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])$
14 | examples:
15 | - value: Australia/Sydney
16 | description: Australia/Sydney
17 | - value: Europe/London
18 | description: Europe/London
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/BR/persons/br_cpf.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - BR
6 | doc: The CPF number (Cadastro de Pessoas Fisicas; Portuguese for Natural Persons Register)
7 | is the Brazilian individual taxpayer registry, since its creation in 1965.
8 | id: brcpf
9 | is_pii: 'True'
10 | langs:
11 | - pt
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/CPF_number
15 | regexp: \d{3}\.\d{3}\.\d{3}\-\d{2}
16 | name: CPF number (Cadastro de Pessoas Fisicas)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/FI/persons/fi_natid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - FI
6 | doc: The Finnish identity card is one of two official identity documents in Finland,
7 | the other being the Finnish passport.
8 | id: fiidcard
9 | is_pii: 'True'
10 | langs:
11 | - fi
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Finnish_identity_card
15 | regexp: ^(0[1-9]|[1-2][0-9]|3[0-1])(0[1-9]|1[0-2])[0-9]{2}[a+-][0-9]{3}[A-z0-9]$
16 | name: Finnish identity card
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/GB/finances/uk_sedol.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: SEDOL stands for Stock Exchange Daily Official List, a list of security identifiers
4 | used in the United Kingdom and Ireland for clearing purposes.
5 | id: sedol
6 | is_pii: 'False'
7 | country:
8 | - GB
9 | - IE
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/SEDOL
15 | name: SEDOL identifier
16 | regexp: '[0-9.]{7}'
17 | examples:
18 | - value: 0263494
19 | description: BAE Systems
20 | classification: identifier
21 | wikidata_property: ''
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/currency.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: A currency in the most specific sense is money in any form when in use or circulation
4 | as a medium of exchange, especially circulating banknotes and coins.
5 | id: currency
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Currency
12 | name: Currency
13 | patterns:
14 | - iso4217curcode
15 | - iso4217curname
16 | - okvname
17 | - okvcode
18 | classification: categorical
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/tld.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: A top-level domain (TLD) is one of the domains at the highest level in the hierarchical
4 | Domain Name System of the Internet after the root domain.[10] The top-level domain
5 | names are installed in the root zone of the name space.
6 | id: tld
7 | is_pii: 'False'
8 | langs:
9 | - en
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Top-level_domain
13 | name: Top level domain (TLD)
14 | classification: identifier
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/any/science/academicdegree.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - persons
3 | - science
4 | doc: Academic degree title
5 | id: academicdegree
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Academic_degree
12 | name: Academic degree
13 | patterns:
14 | - rusdegree
15 | translations:
16 | ru:
17 | doc: Научная степень персоны на русском языке. Например, кандидат технических
18 | наук
19 | name: Научная степень (на русском)
20 | classification: categorical
21 | examples: []
22 | wikidata_property: ''
23 | regexp: ''
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/wbregion.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: World Bank regions are territorial classification of countries
4 | id: wbregion
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups
11 | wikidata_property: P30
12 | examples:
13 | - value: South Asia
14 | description: South Asia
15 | - value: Europe & Central Asia
16 | description: Europe & Central Asia
17 | name: World Bank region
18 | classification: categorical
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/telecom/imei.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - telecom
3 | - pii
4 | doc: The International Mobile Equipment Identity (IMEI) is a numeric identifier, usually
5 | unique for 3GPP and iDEN mobile phones, as well as some satellite phones.
6 | id: imei
7 | is_pii: 'True'
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/International_Mobile_Equipment_Identity
11 | langs:
12 | - common
13 | regexp: \d{2}-\d{6}-\d{6}-\d\d?
14 | name: International Mobile Equipment Identity (IMEI)
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 |
--------------------------------------------------------------------------------
/data/datatypes/BE/persons/be_natcardnum.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - BE
6 | doc: 'Belgium identity card is a national identity card issued to all citizens of
7 | Belgium aged 12 years old and above. '
8 | id: benatcardid
9 | is_pii: 'True'
10 | langs:
11 | - fr
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Belgian_identity_card
15 | name: Belgium national card ID
16 | regexp: '[0-9]{2}\.(0[1-9]|1[0-2])\.(0[1-9]|[1-2][0-9]|3[0-1])-[0-9]{3}\.[0-9]{2}'
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptography/sct.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptography
3 | doc: Signed certificate timestamp (SCT). The SCT is the log's promise to incorporate
4 | the certificate in the Merkle Tree within a fixed amount of time known as the Maximum
5 | Merge Delay (MMD
6 | id: sctcrypto
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: other
12 | url: https://www.rfc-editor.org/rfc/rfc6962.html
13 | regexp: ^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$
14 | name: Signed certificate timestamp (SCT)
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 |
--------------------------------------------------------------------------------
/data/datatypes/any/objectids/wikidataid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | doc: Wikidata makes use of identifiers for both internal organization of the knowledge
4 | base and for its connection to other databases.
5 | id: wikidataid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Wikidata:Identifiers
12 | name: Wikidata Id
13 | examples:
14 | - value: Q12345
15 | description: Count von Count
16 | - value: Q234511
17 | description: Gurk
18 | classification: identifier
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/schemes/tool.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": {
3 | "required": true,
4 | "type": "string"
5 | },
6 | "id": {
7 | "required": true,
8 | "type": "string"
9 | },
10 | "doc": {
11 | "required": true,
12 | "type": "string"
13 | },
14 | "category": {
15 | "required": true,
16 | "type": "string"
17 | },
18 | "website": {
19 | "required": false,
20 | "type": "string"
21 | },
22 | "supported_types": {
23 | "required": false,
24 | "type": "list"
25 | }
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/data/datatypes/GB/geo/uk_toid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - GB
5 | doc: TOpographic IDentifier assigned by the Ordnance Survey to identify a feature
6 | in Great Britain
7 | id: toid
8 | is_pii: 'False'
9 | langs:
10 | - en
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/TOID
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P3120
16 | wikidata_property: P3120
17 | name: UK TOpographic IDentifier (TOID)
18 | classification: identifier
19 | examples:
20 | - value: '7000000000013965'
21 | description: Surrey
22 | regexp: \d{16}
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/SG/persons/sg_nric.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - SG
6 | doc: The National Registration Identity Card (NRIC) is the compulsory identity document
7 | issued to citizens and permanent residents of Singapore.
8 | id: sgnric
9 | is_pii: 'True'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/National_Registration_Identity_Card
15 | regexp: (?i)([STFG][0-9]{7}[A-Z])
16 | name: The Singapore National Registration Identity Card (NRIC)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/tools/pii/scrubadub.yaml:
--------------------------------------------------------------------------------
1 | id: scrubadub
2 | category: pii
3 | name: scrubadub
4 | doc: 'Remove personally identifiable information from free text. Sometimes we have additional metadata about the people we wish to anonymize. Other times we dont. This package makes it easy to seamlessly scrub personal information from free text, without compromising the privacy of the people we are trying to protect.`'
5 | website: https://github.com/LeapBeyond/scrubadub
6 | supported_types:
7 | - email
8 | - phone
9 | - birthday
10 | - bankcard
11 | - postindex
12 | - address
13 | - person_fullname
14 |
--------------------------------------------------------------------------------
/data/datatypes/AU/companies/au_abn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - AU
5 | doc: The Australian Business Number (ABN) is a unique 11-digit identifier issued by
6 | the Australian Business Register (ABR) which is operated by the Australian Taxation
7 | Office (ATO).
8 | id: auabn
9 | is_pii: 'False'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Australian_Business_Number
15 | regexp: \d{2}\s\d{3}\s\d{3}\s\d{3}
16 | name: Australian Business Number (ABN)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/AU/persons/au_tfn_number.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - AU
6 | doc: The tax file number (TFN) is a unique identifier issued by the Australian Taxation
7 | Office to each taxpaying entity — an individual, company, superannuation fund, partnership,
8 | or trust.
9 | id: autfn
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Tax_file_number
16 | regexp: \d{3}\s\d{3}\s\d{3}
17 | name: Australian Tax File Number (TFN)
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/EU/industry/eu_cpvcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - industry
3 | country:
4 | - EU
5 | doc: massively multilingual and public domain taxonomy legislated by the EU for goods
6 | and services
7 | id: cpvcode
8 | is_pii: 'False'
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P5417
12 | langs:
13 | - en
14 | name: CPV (Common Procurement Vocabulary) code
15 | wikidata_property: P5417
16 | regexp: \d{8}
17 | examples:
18 | - value: '24110000'
19 | description: industrial gas
20 | - value: '24111700'
21 | description: nitrogen
22 | classification: categorical
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/GB/medical/uk_bnfcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | country:
4 | - GB
5 | doc: The British National Formulary (BNF) is a reference book containing the standard
6 | list of medicines used in UK prescribing. It gives information on the indications,
7 | dosages and side effects for over 70,000 medicines.
8 | id: ukbnfcode
9 | is_pii: 'False'
10 | langs:
11 | - en
12 | links:
13 | - type: other
14 | url: https://www.thedatalab.org/blog/2017/04/prescribing-data-bnf-codes/
15 | name: BNF code
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/GB/persons/uk_nino.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - GB
6 | doc: The National Insurance number is a number used in the United Kingdom in the administration
7 | of the National Insurance or social security system. It is also used for some purposes
8 | in the UK tax system.
9 | id: uknino
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/National_Insurance_number
16 | name: UK National Insurance number
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_kpp.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Russian company supplemental registration code (KPP). Used in combination with
6 | Taxpayer INN code
7 | id: kpp
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian company supplemental registration code (KPP)
12 | translations:
13 | ru:
14 | doc: Вспомогательный код используемый вместе с кодом ИНН для идентификации налоговой
15 | принадлежности компании.
16 | name: Код постановки на учёт (КПП)
17 | classification: identifier
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_npi.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: A National Provider Identifier (NPI) is a unique 10-digit identification number
7 | issued to health care providers in the United States by the Centers for Medicare
8 | and Medicaid Services (CMS).
9 | id: usnpi
10 | langs:
11 | - en
12 | is_pii: 'True'
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/National_Provider_Identifier
16 | name: US National Provider Identifier (NPI)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/shipping/s10upu.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | - shipping
4 | doc: The UPU S10 standard defines a system for assigning 13-character identifiers
5 | to international postal items for the purpose of tracking and tracing them during
6 | shipping.
7 | id: s10upu
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | parent:
12 | type: tracknum
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/S10_(UPU_standard)
16 | regexp: ^[A-z]{2}\d{8}\d{1}[A-z]{2}$
17 | name: S10 (UPU standard)
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/tools/other/metabase.yaml:
--------------------------------------------------------------------------------
1 | id: metabase
2 | category: other
3 | name: Metabase
4 | doc: 'Metabase is a simple and powerful analytics tool which lets anyone learn and make decisions from their company s data no technical knowledge required.'
5 | website: https://www.metabase.com/docs/latest/users-guide/field-types.html
6 | supported_types:
7 | - city
8 | - country
9 | - latitude
10 | - longitude
11 | - usstate
12 | - postindex
13 | - birthday
14 | - company
15 | - email
16 | - username
17 | - datetime
18 | - timestamp
19 | - time
20 | - date
21 | - url
22 |
23 |
--------------------------------------------------------------------------------
/data/datatypes/AU/persons/au_medicare.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - AU
6 | doc: Medicare number is a unique identifier issued by Australian Government that enables
7 | the cardholder to receive a rebates of medical expenses under Australia Medicare
8 | system.
9 | id: aumedicarenum
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Medicare_card_(Australia)
16 | regexp: '[2-6]\d{3}\s\d{5}\s\d'
17 | name: Australian medicare number
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/DE/medical/opscode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | id: opscode
4 | is_pii: 'False'
5 | langs:
6 | - de
7 | name: OPS-Prozedurcode
8 | doc: DE datatype for 'OPS-Prozedurcode anhand Feldnamen' (opscode) from rules in context
9 | 'medical'.
10 | classification: identifier
11 | country:
12 | - DE
13 | links: []
14 | examples:
15 | - value: '1234'
16 | description: OPS procedure code with 4 digits without fractional part
17 | - value: '1234.5'
18 | description: OPS procedure code with 4 digits and a 1-digit fractional part
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ^[0-9]{4}(\.[0-9]{1,2})?$
22 |
--------------------------------------------------------------------------------
/data/datatypes/ES/persons/es_nie_number.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - ES
6 | doc: The NIE is a tax identification number in Spain, known in Spanish as the NIE,
7 | or more formally the Numero de identidad de extranjero
8 | id: esnie
9 | is_pii: 'True'
10 | langs:
11 | - es
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/NIE_number
15 | regexp: ^(X(-|\.)?0?\d{7}(-|\.)?[A-Z]|[A-Z](-|\.)?\d{7}(-|\.)?[0-9A-Z]|\d{8}(-|\.)?[A-Z])$
16 | name: Spanish Foreigner Identity Number (NIE)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/EU/geo/eu_nuts.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - EU
5 | doc: European Union geographic code, identifier for a region per NUTS
6 | id: eunuts
7 | is_pii: 'False'
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P605
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics
13 | langs:
14 | - en
15 | name: NUTS code (EU)
16 | wikidata_property: P605
17 | regexp: '[A-Z]{2}[A-Z0-9]{0,3}'
18 | examples:
19 | - value: BE10
20 | description: Brussels-Capital Region
21 | classification: categorical
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/GB/persons/uk_driver_license.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - GB
6 | doc: In the United Kingdom, a driving licence is the official document which authorises
7 | its holder to operate motor vehicles on highways and other public roads.
8 | id: ukdriverlic
9 | is_pii: 'True'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Driving_licence_in_the_United_Kingdom
15 | regexp: ^[A-Z9]{5}\d{6}[A-Z9]{2}\d[A-Z]{2}$
16 | name: UK driver license number
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_fips52.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: Two-digit identifier for US states and other associated areas per former Federal
6 | Information Processing Standard FIPS 5-2 standard
7 | id: fips52
8 | is_pii: 'False'
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P5087
12 | langs:
13 | - en
14 | name: FIPS 5-2 numeric code (US states)
15 | wikidata_property: P5087
16 | regexp: \d{2}
17 | examples:
18 | - value: '76'
19 | description: Navassa Island
20 | - value: '70'
21 | description: Palau
22 | classification: categorical
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/ssdeep.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: CTPH hash of the file content.
4 | id: ssdeep
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: other
10 | url: https://ssdeep-project.github.io/ssdeep/index.html
11 | - type: other
12 | url: https://developers.virustotal.com/reference/ssdeep
13 | name: SSDEEP hash
14 | examples:
15 | - value: 768:uPC0xySqWNPwcKnReqpxORBoWNOMFN5cYsFx1gAmOURksWrk/VwLtkKavNi3IJzU:uPC0xyowcklqHw9xGkLrNLtBiNR
16 | description: Virus total file Twain_32.dll
17 | classification: identifier
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/EU/transport/eu_cin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | country:
4 | - EU
5 | doc: The Craft Identification Number (CIN) or Hull Identification Number (HIN), standardised
6 | as EN ISO 10087:2006, is a permanent unique fourteen-digit alphanumeric identifier
7 | issued to all marine vessels in Europe.
8 | id: eucin
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Craft_Identification_Number
15 | name: Craft Identification Number
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_region.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Code or name of the region (federal subject) in Russia as it is in country Constitution
6 | id: rusregion
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian region (federal subject) coce or name
11 | patterns:
12 | - rusregionname
13 | - rusregioncode
14 | translations:
15 | ru:
16 | doc: Код или наименование субъекта Российской Федерации по Конституции страны
17 | name: Код или наименование субъекта Российской Федерации
18 | classification: categorical
19 | links: []
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_npakind.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: 'Type of the Russian legal document: order, executive order and so on'
6 | id: runpakind
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian regulatory legal act type
11 | parent:
12 | type: legislationtype
13 | translations:
14 | ru:
15 | doc: 'Тип нормативного документа, например: указ, постановление, приказ и др.,
16 | на русском языке'
17 | name: Тип нормативного документа в России/на русском языке
18 | classification: categorical
19 | links: []
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/companies/orgname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | doc: Name of the organization/business/company in any language
4 | id: orgname
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Company or organization name
9 | parent:
10 | type: name
11 | links:
12 | - type: schema.org
13 | url: https://schema.org/legalName
14 | patterns:
15 | - encompanyname
16 | - rucompanyname
17 | translations:
18 | ru:
19 | doc: Название организации, бизнеса или компании на любом языке
20 | name: Название организации или компании
21 | classification: identifier
22 | examples: []
23 | wikidata_property: ''
24 | regexp: ''
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/duration.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: The amount of time elapsed between two events
4 | id: duration
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/ISO_8601#Durations
11 | - type: schema.org
12 | url: https://schema.org/duration
13 | - type: other
14 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html
15 | name: Time duration
16 | translations:
17 | ru:
18 | doc: Длительность события в музыке, мероприятии и тд.
19 | name: Длительность
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/timespan.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: 'The TimeSpan structure represents a length of time (duration of time or elapsed
4 | time), and may be expressed as start/end, start/duration, or duration/end. Start,
5 | end, and duration are documented using the designated DateTime structures. Examples:
6 | start/end 2018-02-22T13:0'
7 | id: timespan
8 | is_pii: 'False'
9 | links:
10 | - type: other
11 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html
12 | langs:
13 | - common
14 | name: Timespan
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 | regexp: ''
19 |
--------------------------------------------------------------------------------
/data/datatypes/DE/persons/de_personalausweis.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - DE
6 | doc: 'The German Identity Card is issued to German citizens by local registration
7 | offices in Germany and diplomatic missions abroad, while they are produced at the
8 | Bundesdruckerei in Berlin. '
9 | id: depersonalausweis
10 | is_pii: 'True'
11 | langs:
12 | - de
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/German_identity_card
16 | regexp: ^[0-9]{12}$
17 | name: Germany national identity card
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_kosgucode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian Public Administration Sector Classification Code (KOSGU) used in Russian
6 | budget planning and procurement
7 | id: kosgucode
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian Public Administration Sector Classification Code (KOSGU)
12 | translations:
13 | ru:
14 | doc: Код классификации секторов государственного управления используемые в Российской
15 | бюджетной системе
16 | name: Код КОСГУ
17 | classification: categorical
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_kvrcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian budget expense type code (KVR) used in budget planning and procurement
6 | id: kvrcode
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian budget expense type code (KVR)
11 | translations:
12 | ru:
13 | doc: Код вида расходов (КВР) используется как часть кода бюджетной классификации
14 | в процессах связанных с бюджетированием и госзакупками в Российской Федерации
15 | name: Код вида расходов (КВР)
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/persons/ru_snils.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - RU
6 | doc: Russian social insurance identifier. Issued for every insured person by Pension
7 | fund of Russia
8 | id: rusnils
9 | is_pii: 'True'
10 | langs:
11 | - ru
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/SNILS_(Russia)
15 | name: Individual insurance account number (SNILS)
16 | translations:
17 | ru:
18 | doc: Страховой номер индивидуального лицевого счёта физического лица (СНИЛС)
19 | name: Код СНИЛС
20 | classification: identifier
21 | examples: []
22 | wikidata_property: ''
23 | regexp: ''
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/iso8601duration.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: The amount of intervening time in a time interval and are represented by the
4 | format P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W as shown on the aside
5 | id: iso8601duration
6 | langs:
7 | - common
8 | name: Duration
9 | semantic_type: duration
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/ISO_8601#Durations
13 | examples:
14 | - value: P3Y6M4DT12H30M5S
15 | description: Three years, six months, four days, twelve hours, thirty minutes, and
16 | five seconds
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/any/identifiers/uuid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - identifiers
3 | doc: A universally unique identifier (UUID) is a 128-bit label used for information
4 | in computer systems. The term globally unique identifier (GUID) is also used.
5 | id: uuid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Universally_unique_identifier
12 | name: A universally unique identifier (UUID)
13 | regexp: '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}'
14 | classification: identifier
15 | examples: []
16 | wikidata_property: ''
17 | translations: {}
18 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_budgetcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Budget codes issued by Ministry of Finances and Federal Treasury of Russia to
6 | all government and local budgets.
7 | id: budgetcode
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian government budget code
12 | translations:
13 | ru:
14 | doc: 'Уникальный код, присваеваемый каждому отдельному бюджету: федеральному,
15 | субъекта федерации и муниципалитету в Российской Федерации.'
16 | name: Код бюджета
17 | classification: categorical
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_tofkcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Unique id of the Russian Federal treasury division.
6 | id: tofkcode
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian Federal treasury division code
11 | translations:
12 | ru:
13 | doc: Код территориального управления Федерального казначейства РФ. Синхронизовано
14 | с наименованием территориального управления, ведомственный справочник.
15 | name: Код территориального управления Федерального казначейства РФ
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/common/genre.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: Genre of the creative work, broadcast channel or group.
4 | id: genre
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | name: Genre
9 | links:
10 | - type: schema.org
11 | url: https://schema.org/genre
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P136
14 | wikidata_property: P136
15 | translations:
16 | ru:
17 | doc: Жанр картины, музыки, фильма, документа или иного произведения исскуства
18 | или иного созданного
19 | name: Жанр
20 | classification: categorical
21 | parent:
22 | type: category
23 | examples: []
24 | regexp: ''
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/identifiers/dcid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - identifiers
3 | doc: Every entity in Data Commons (DC) has a unique identifier, called ‘dcid’. So,
4 | for example, the dcid of California is ‘geoId/06’ and of India is ‘country/IND’.
5 | id: dcid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: other
11 | url: https://docs.datacommons.org/bigquery/unique_identifiers.html
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P10730
14 | name: Datacommons unique id
15 | regexp: '[A-Za-z\d_/]+'
16 | wikidata_property: P10730
17 | classification: identifier
18 | examples: []
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/_original/categories.csv:
--------------------------------------------------------------------------------
1 | common Common
2 | pii Personally idenfiable information
3 | geo Geographic identifiers
4 | medical Medical/pharma identifiers
5 | finances Finances and banking
6 | datetime Date and time
7 | government Government related
8 | science Scientific
9 | companies Companies/business/organizations
10 | internet Internet
11 | identifiers Universal unique identifiers
12 | objectids Objects identifiers
13 | cryptography Cryptographic identifiers
14 | persons Person related
15 | cryptocurrency Cryptocurrency
16 | transport Transportation
17 | values All measurable values (percentage, amount and e.t.c)
18 | chemistry Chemistry
--------------------------------------------------------------------------------
/data/datatypes/GB/persons/uk_nhs_number.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - medical
5 | country:
6 | - GB
7 | doc: NHS numbers are the unique numbers allocated to registered users of the three
8 | public health services in England, Wales and the Isle of Man; the three health systems
9 | use a shared numbering scheme.
10 | id: uknhsnum
11 | is_pii: 'True'
12 | langs:
13 | - en
14 | links:
15 | - type: wikipedia
16 | url: https://en.wikipedia.org/wiki/NHS_number
17 | regexp: ([0-9]{3})[- ]?([0-9]{3})[- ]?([0-9]{4})
18 | name: UK NHS Number
19 | classification: identifier
20 | examples: []
21 | wikidata_property: ''
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_tofkname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Name of the Russian Federal treasury division
6 | id: tofkname
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian Federal treasury division name
11 | translations:
12 | ru:
13 | doc: Наименование территориального управления Федерального казначейства РФ. Синхронизовано
14 | с кодом территориального управления, ведомственный справочник.
15 | name: Наименование территориального управления Федерального казначейства РФ
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/US/finances/us_aba_routing.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: In the United States, an ABA routing transit number (ABA RTN) is a nine-digit
4 | code printed on the bottom of checks to identify the financial institution on which
5 | it was drawn.
6 | id: abaroutingnum
7 | is_pii: 'False'
8 | langs:
9 | - en
10 | country:
11 | - US
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/ABA_routing_transit_number
15 | regexp: '[0123678]\d{3}-\d{4}-\d'
16 | name: American Banking Association (ABA) routing number
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/CA/persons/ca_bc_ptn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - CA
6 | doc: Each B.C. resident enrolled with the Medical Services Plan (MSP) is given a unique
7 | lifetime identifier for health care called a Personal Health Number (PHN).
8 | id: cabcphn
9 | is_pii: 'True'
10 | langs:
11 | - en
12 | links:
13 | - type: other
14 | url: https://www2.gov.bc.ca/gov/content/health/health-drug-coverage/msp/bc-residents/personal-health-identification
15 | name: Canada British Columbia's Personal Health Number (PHN)
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_ppocode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian public legal formation is a legal definition of public entity with right
6 | of administration of its level budget
7 | id: ppocode
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian public legal formation code
12 | translations:
13 | ru:
14 | doc: Публично правовое образование - это организация/субъект федерации с правом
15 | распоряжения бюджетом своего уровня
16 | name: Код публично правового образования в РФ
17 | classification: categorical
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/ipv4subnet.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: Range of IPv4 addresses
4 | id: ipv4subnet
5 | is_pii: 'True'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P3761
11 | name: IPv4 routing prefix (subnet)
12 | wikidata_property: P3761
13 | regexp: (([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]?|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]?|25[0-5])\/([0-9]|[12][0-9]|3[0-2])
14 | examples:
15 | - value: 163.1.0.0/16
16 | description: University of Oxford
17 | - value: 193.166.190.0/24
18 | description: Helsinki University Central Hospital
19 | translations: {}
20 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_kvrname.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian budget expense type code (KVR) used in budget planning and procurement
6 | id: kvrname
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | name: Russian budget expense type code name (KVR)
11 | translations:
12 | ru:
13 | doc: Код вида расходов (КВР) используется как часть кода бюджетной классификации
14 | в процессах связанных с бюджетированием и госзакупками в Российской Федерации
15 | name: Наименование кода вида расходов (КВР)
16 | classification: categorical
17 | links: []
18 | examples: []
19 | wikidata_property: ''
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/longitude.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - common
4 | doc: Longitude is a geographic coordinate that specifies the east–west position of
5 | a point on the Earths surface, or the surface of a celestial body. It is an angular
6 | measurement, usually expressed in degrees and denoted by the Greek letter lambda
7 | (λ)
8 | id: longitude
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Longitude
15 | - type: schema.org
16 | url: http://schema.org/longitude
17 | name: Longitude
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_pponame.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian public legal formation is a legal definition of public entity with right
6 | of administration of its level budget
7 | id: pponame
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian public legal formation name
12 | translations:
13 | ru:
14 | doc: Публично правовое образование - это организация/субъект федерации с правом
15 | распоряжения бюджетом своего уровня
16 | name: Наименование публично правового образования в РФ
17 | classification: categorical
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_ptin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: 'The Preparer Tax Identification Number (PTIN) is an identification number that
7 | all paid tax return preparers must use on U.S. federal tax returns or claims for
8 | refund submitted to the Internal Revenue Service (IRS). '
9 | id: usptin
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: other
15 | url: https://en.wikipedia.org/wiki/Preparer_Tax_Identification_Number
16 | name: US PTIN (Preparer Tax Identification Number)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/files/filename.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | doc: File name with common notation like somefile.ext
4 | id: filename
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikipedia
10 | url: https://en.wikipedia.org/wiki/Filename
11 | - type: datadrivendiscovery
12 | url: https://metadata.datadrivendiscovery.org/types/FileName
13 | name: Name of the file
14 | examples:
15 | - value: sheet.xls
16 | description: sheet.xls
17 | - value: dataset.xml
18 | description: dataset.xml
19 | translations:
20 | ru:
21 | doc: Название файла в файловой системе
22 | name: Название файла
23 | wikidata_property: ''
24 | regexp: ''
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/latitude.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - common
4 | doc: In geography, latitude is a geographic coordinate that specifies the north–south
5 | position of a point on the Earths surface. Latitude is an angle (defined below)
6 | which ranges from 0° at the Equator to 90° (North or South) at the poles.
7 | id: latitude
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Latitude
14 | - type: schema.org
15 | url: http://schema.org/latitude
16 | name: latitude
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/science/doiprefix.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - science
3 | - identifiers
4 | doc: Identifier specific to a DOI registrant
5 | id: doiprefix
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Digital_object_identifier
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P1662
14 | wikidata_property: P1662
15 | name: Digital Object Identifier (DOI) prefix
16 | translations:
17 | ru:
18 | doc: Уникальный идентификатор для регистратора идентификаторов цифровых объектов
19 | name: Префикс DOI
20 | classification: identifier
21 | examples: []
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/address.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | - pii
4 | - geo
5 | doc: Geographic address, commonly consist of postindex, country, area, town, street
6 | and building
7 | id: address
8 | is_pii: 'True'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Address
14 | name: Address (physical location)
15 | translations:
16 | ru:
17 | doc: Географический или административный адрес местонахождения. Может включать
18 | почтовый индекс, название страны, региона, города, улицы и здания.
19 | name: Адрес (физическое местонахождение)
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/CA/companies/cacorp.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | id: cacorp
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Federal corporation number
8 | doc: CA datatype for 'Federal corporation number' (cacorp) from rules
9 | in context 'companies'.
10 | classification: identifier
11 | country:
12 | - CA
13 | links: []
14 | examples:
15 | - value: '1234567'
16 | description: Federal corporation number with 7 digits
17 | - value: '12345678'
18 | description: Federal corporation number with 8 digits
19 | - value: '123456789'
20 | description: Federal corporation number with 9 digits
21 | wikidata_property: ''
22 | translations: {}
23 | regexp: ^[0-9]{7,9}$
24 |
--------------------------------------------------------------------------------
/data/datatypes/CA/finances/cacharity.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: cacharity
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Charity registration number
8 | doc: CA datatype for 'Charity registration number' (cacharity) from
9 | rules in context 'finances'.
10 | classification: identifier
11 | country:
12 | - CA
13 | links: []
14 | examples:
15 | - value: '123456789RR0001'
16 | description: Charity registration number with 9-digit BN, 'RR' program identifier and 4-digit reference
17 | - value: '987654321RR0002'
18 | description: Another valid charity registration number format
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ^[0-9]{9}RR[0-9]{4}$
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_kadastr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Russian land registry number (kadastroviy nomer, cadaster number) assigned to
6 | each land, territory, building and other geographic administrative objects by Rosreestr
7 | id: rukadastr
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian land registry number
12 | translations:
13 | ru:
14 | doc: Кадастровый номер — уникальный номер объекта недвижимости, присваиваемый
15 | ему при осуществлении кадастрового и технического учёта.
16 | name: Кадастровый номер
17 | classification: identifier
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_zipcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: A ZIP Code is a postal code used by the United States Postal Service (USPS).
6 | Introduced in 1963, the basic format consisted of five digits. In 1983, an extended
7 | ZIP+4 code was introduced; it included the five digits of the ZIP Code, followed
8 | by a hyphen and four digits that designated a more specific location.
9 | id: uszipcode
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/ZIP_Code
15 | name: US Postal code
16 | semantic_type: postindex
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/government/legislationtype.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | doc: The type of the legislation. Examples of values are "law", "act", "directive",
4 | "decree", "regulation", "statutory instrument", "loi organique", "reglement grand-ducal",
5 | etc., depending on the country.
6 | id: legislationtype
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: schema.org
12 | url: https://schema.org/legislationType
13 | name: Legislation type
14 | parent:
15 | type: genre
16 | translations:
17 | ru:
18 | doc: Тип нормативного документа
19 | name: Тип нормативного документа
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptography/crc32.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptography
3 | doc: A cyclic redundancy check (CRC) is an error-detecting code commonly used in digital
4 | networks and storage devices to detect accidental changes to digital data. Blocks
5 | of data entering these systems get a short check value attached, based on the remainder
6 | of a polynomial division of their contents.
7 | id: crc32
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | parent:
12 | type: numeric
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Cyclic_redundancy_check
16 | name: CRC-32
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/objectids/openlibraryid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | doc: Identifier for a work ("W"), edition ("M") or author ("A") for book data of the
4 | Internet Archive
5 | id: openlibid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Open_Library
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P648
14 | name: Open Library ID
15 | regexp: OL[1-9]\d*[AMW]
16 | wikidata_property: P648
17 | examples:
18 | - value: OL36858W
19 | description: Twenty Years After
20 | - value: OL3156833A
21 | description: Anton Chekhov
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/richpeheader.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: The Rich header is an undocumented header contained within PE files compiled
4 | and linked using the Microsoft toolchain. It contains information about the build
5 | environment that the PE file was created in.
6 | id: richpeheaderhash
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: other
12 | url: https://github.com/RichHeaderResearch/RichPE
13 | name: Rich PE Header hash
14 | examples:
15 | - value: 6fef15a59affb0e30563fc8baf21ed67
16 | description: Virus total file Twain_32.dll
17 | classification: identifier
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/DE/finances/handelsregisternr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: handelsregisternr
4 | is_pii: 'False'
5 | langs:
6 | - de
7 | name: Handelsregisternummer anhand Feldname
8 | doc: DE datatype for 'Handelsregisternummer anhand Feldname' (handelsregisternr) from
9 | rules in context 'finances'.
10 | classification: identifier
11 | country:
12 | - DE
13 | links: []
14 | examples:
15 | - value: 'H12345'
16 | description: Handelsregisternummer with single-letter prefix and 5-digit number
17 | - value: 'A1'
18 | description: Handelsregisternummer with single-letter prefix and 1-digit number
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: '^[A-Z][0-9]{1,6}$'
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/postindex.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - pii
4 | - common
5 | doc: Postal index/code, unique post identifier used in address
6 | id: postindex
7 | is_pii: 'True'
8 | langs:
9 | - common
10 | links:
11 | - type: wikidata
12 | url: https://www.wikidata.org/wiki/Property:P281
13 | - type: schema.org
14 | url: https://schema.org/postalCode
15 | wikidata_property: P281
16 | name: Postal code/index
17 | patterns:
18 | - ukpostalcode
19 | - uszipcode
20 | - ruspostalcode
21 | - frpostcode
22 | - espostcode
23 | translations:
24 | ru:
25 | doc: Индекс почтовой связи
26 | name: Почтовый индекс
27 | classification: categorical
28 | examples: []
29 | regexp: ''
30 |
--------------------------------------------------------------------------------
/data/datatypes/any/shipping/unpackaginggroup.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | - chemistry
4 | doc: Packaging code according to UN transportation rules
5 | id: unpkggroup
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P876
12 | name: UN Packaging group
13 | wikidata_property: P876
14 | regexp: '(I\/II)|(II\/III)|I{1,3}'
15 | examples:
16 | - value: 'I'
17 | description: sodium azide
18 | - value: 'II'
19 | description: hydrofluoric acid
20 | translations:
21 | ru:
22 | doc: 'Код группы упаковки ООН'
23 | name: Код группы упаковки ООН
24 | classification: categorical
--------------------------------------------------------------------------------
/data/datatypes/any/telecom/msisdn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - telecom
3 | - pii
4 | doc: MSISDN is a number uniquely identifying a subscription in a Global System for
5 | Mobile communications or a Universal Mobile Telecommunications System mobile network.
6 | It is the mapping of the telephone number to the subscriber identity module in a
7 | mobile or cellular phone.
8 | id: msisdn
9 | is_pii: 'True'
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/MSISDN
13 | regexp: ^[1-9][0-9]{10,14}$
14 | langs:
15 | - common
16 | name: Mobile Subscriber ISDN Number (MSISDN)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 |
--------------------------------------------------------------------------------
/data/datatypes/AR/persons/ar_dni.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - AR
6 | doc: Documento Nacional de Identidad or DNI (which means National Identity Document)
7 | is the main identity document for Argentine citizens, as well as temporary or permanent
8 | resident aliens (DNI Extranjero).
9 | id: ardni
10 | is_pii: 'True'
11 | langs:
12 | - es
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Documento_Nacional_de_Identidad_(Argentina)
16 | regexp: '[0-9]{2}[\.]{1}?[0-9]{3}[\.]{1}?[0-9]{3}'
17 | name: Argentina DNI (National Identity Document)
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_itin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: A Taxpayer Identification Number (TIN) is an identifying number used for tax
7 | purposes in the United States and in other countries under the Common Reporting
8 | Standard.
9 | id: usitin
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Taxpayer_Identification_Number
16 | regexp: (9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4})
17 | name: US ITIN (Individual Taxpayer Identification Number)
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_ssn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: In the United States, a Social Security number (SSN) is a nine-digit number issued
7 | to U.S. citizens, permanent residents, and temporary (working) residents under section
8 | 205(c)(2) of the Social Security Act, codified as 42 U.S.C. § 405(c)(2)
9 | id: usssn
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Social_Security_number
16 | regexp: ([0-9]{3})[- .]([0-9]{2})[- .]([0-9]{4})
17 | name: US Social Security Number
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/objectids/viafid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | doc: 'Identifier for the Virtual International Authority File database [format: up
4 | to 22 digits]'
5 | id: viafid
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Virtual_International_Authority_File
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P214
14 | name: VIAF ID
15 | examples:
16 | - value: '44298806'
17 | description: Foncine, Jean-Louis
18 | - value: '125715126'
19 | description: Cairo
20 | regexp: ([1-9]\d(?:\d{0,7}|\d{17,20}))
21 | wikidata_property: P214
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/RU/geo/ru_kladr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - RU
5 | doc: Russian unique codes assigned by Government Tax Service for every administrative
6 | address in Russian federation. Replaces by FIAS government system
7 | id: kladr
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: KLADR code (Russian)
12 | translations:
13 | ru:
14 | doc: КЛАДР — ведомственный классификатор ФНС России, созданный для распределения
15 | территорий между налоговыми инспекциями и автоматизированной рассылки корреспонденции.
16 | Заменен системой ФИАС
17 | name: Код КЛАДР
18 | classification: identifier
19 | links: []
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/unixtime.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - datetime
3 | doc: Digital timestamp defined as Unix Time. It is a system for describing a point
4 | in time. It is the number of seconds that have elapsed since the Unix epoch, excluding
5 | leap seconds.
6 | id: unixtime
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Unix_time
13 | name: Digital timestamp (Unix time, Epoch time, Posix time)
14 | regexp: \d{1,10}
15 | examples:
16 | - value: '1652190194'
17 | description: '2022-05-10T13:43:14+00:00'
18 | - value: '915148800'
19 | description: '1998-12-31T23:59:60.25'
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/railway/uiccode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: Identifier for a railway station in Europe, CIS countries, the Far East (China,
4 | Mongolia, Japan, Korea, Vietnam), North Africa and the Middle East
5 | id: uiccode
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikidata
11 | url: https://www.wikidata.org/wiki/Property:P722
12 | name: UIC station code
13 | wikidata_property: P722
14 | regexp: (10|2\d|3[0-3]|4[01249]|5\d|6[0125678]|7\d|8[0-8]|9\d).+|
15 | examples:
16 | - value: '8748100'
17 | description: Nantes Station
18 | - value: '7120100'
19 | description: Train station of Astorga
20 | classification: identifier
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/CA/companies/cabizlic.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | id: cabizlic
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Business licence number
8 | doc: CA datatype for 'Business licence number' (cabizlic) from rules
9 | in context 'companies'.
10 | classification: identifier
11 | country:
12 | - CA
13 | links: []
14 | examples:
15 | - value: A1234
16 | description: Business licence number with 1 letter and 4 digits
17 | - value: BC12345
18 | description: Business licence number with 2 letters and 5 digits
19 | - value: XYZ1234567
20 | description: Business licence number with 3 letters and 7 digits
21 | wikidata_property: ''
22 | translations: {}
23 | regexp: ^[A-Z]{1,3}[0-9]{4,7}$
24 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_okpo.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Russian organizations statistical code (OKPO) issued by Rosstat
6 | id: okpo
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | links:
11 | - type: wikidata
12 | url: https://www.wikidata.org/wiki/Property:P2391
13 | name: Russian organization stat code
14 | wikidata_property: P2391
15 | regexp: '\d{8}'
16 | examples:
17 | - value: '00040778'
18 | description: Gazprom
19 | - value: '03323755'
20 | description: TEK SPB
21 | translations:
22 | ru:
23 | doc: Код по Общероссийскому классификатору предприятий и организаций
24 | name: Код ОКПО
25 | classification: identifier
--------------------------------------------------------------------------------
/data/datatypes/any/industry/isicrev4.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - industry
3 | langs:
4 | - common
5 | doc: Code of industry by ISIC (International Standard Industrial Classification of
6 | All Economic Activities)
7 | id: isicrev4
8 | is_pii: 'False'
9 | links:
10 | - type: other
11 | url: https://www.oecd-ilibrary.org/content/publication/8722852c-en
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P1796
14 | name: ISIC rev 4 class code
15 | wikidata_property: P1796
16 | regexp: ([A-U]|\d{2,4})
17 | examples:
18 | - value: '3030'
19 | description: aerospace industry
20 | - value: '3020'
21 | description: locomotive manufacturing
22 | classification: categorical
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/EU/transport/eu_eninumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | country:
4 | - EU
5 | doc: An ENI number (European Number of Identification or European Vessel Identification
6 | Number) is a registration for ships capable of navigating on inland European waters.
7 | It is a unique, eight-digit identifier that is attached to a hull for its entire
8 | lifetime, independent of the vessels current name or flag.
9 | id: eninumber
10 | is_pii: 'False'
11 | langs:
12 | - common
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/ENI_number
16 | name: ENI Number
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_fedgrbs.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: 3 digits code of the Russian direct government budget receiver and distributor.
6 | Assigned to each government ministry, agency, service and some other budget distributor
7 | organizations
8 | id: fedgrbs
9 | is_pii: 'False'
10 | langs:
11 | - ru
12 | name: Russian direct government budget receiver and distributor code
13 | translations:
14 | ru:
15 | name: Код главного распорядителя бюджетных средств
16 | doc: Код из 3 цифр определяющий конкретного распределителя бюджетных средств (ГРБС)
17 | classification: categorical
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/SE/persons/se_personnumer.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - SE
6 | doc: 'The personal identity number (Swedish: personnummer) is the Swedish national
7 | identification number. It is a ten digit number that is widely used in Sweden to
8 | identify individuals.'
9 | id: sepersonnummer
10 | is_pii: 'True'
11 | langs:
12 | - se
13 | links:
14 | - type: other
15 | url: https://en.wikipedia.org/wiki/Personal_identity_number_(Sweden)
16 | name: Sweden personal identity number (personnummer)
17 | classification: identifier
18 | examples: []
19 | wikidata_property: ''
20 | translations: {}
21 | regexp: ^[0-9]{2,4}[0-9]{2}(0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1])[-+][0-9]{4}$
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/maritime/imonumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: The International Maritime Organization (IMO) number is a unique identifier for
4 | ships, registered ship owners and management companies.
5 | id: imonumber
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/IMO_number
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P458
14 | name: IMO ship Number
15 | wikidata_property: P458
16 | regexp: ([1-9]\d{6})
17 | examples:
18 | - value: '6725418'
19 | description: Queen Elizabeth 2
20 | - value: '9224752'
21 | description: TI Asia
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/CA/finances/cabn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: cabn
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: Canada Business Number
8 | doc: CA datatype for 'Canada Business Number' (cabn) from rules in context
9 | 'finances'.
10 | classification: identifier
11 | country:
12 | - CA
13 | links:
14 | - type: doc
15 | url: https://www.canada.ca/en/revenue-agency/services/tax/businesses/topics/registering-your-business/business-number.html
16 | examples:
17 | - value: '123456789'
18 | description: Canada Business Number with 9 digits
19 | - value: '987654321'
20 | description: Another valid Canada Business Number with 9 digits
21 | wikidata_property: ''
22 | translations: {}
23 | regexp: ^[0-9]{9}$
24 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_ogrn_ogrnip.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - RU
5 | doc: Russian primary government identification number assigned by Tax Service to any
6 | business entity (OGRN) in Russia and sole prprietors (OGRNIP)
7 | id: ogrn_ogrnip
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian primary government identification number (OGRN and OGRNIP)
12 | translations:
13 | ru:
14 | doc: Основной государственный регистрационный номер индивидуального предпринимателя
15 | или ОГРН (основной государственный регистрационный номер)
16 | name: Код ОГРН или ОГРНИП
17 | classification: identifier
18 | links: []
19 | examples: []
20 | wikidata_property: ''
21 | regexp: ''
22 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/tickersymbol.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: Ticker symbol or stock symbol is an abbreviation used to uniquely identify publicly
4 | traded shares of a particular stock on a particular stock market.
5 | id: tickersymbol
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Ticker_symbol
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P249
14 | name: Stock ticker symbol
15 | regexp: '[A-Z0-9.]{1,16}'
16 | wikidata_property: P249
17 | examples:
18 | - value: MSFT
19 | description: Microsoft
20 | - value: AMZN
21 | description: Amazon
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_ein.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: An Employer Identification Number (EIN) is also known as a Federal Tax Identification
7 | Number, and is used to identify a business entity. Generally, businesses need an
8 | EIN.
9 | id: usein
10 | is_pii: 'True'
11 | langs:
12 | - en
13 | links:
14 | - type: other
15 | url: https://www.irs.gov/businesses/small-businesses-self-employed/employer-id-numbers
16 | - type: wikipedia
17 | url: https://en.wikipedia.org/wiki/Employer_Identification_Number
18 | regexp: '[0-9]{9}'
19 | name: US EIN (Employer Identification Number)
20 | classification: identifier
21 | examples: []
22 | wikidata_property: ''
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/datetime/birthday.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - datetime
4 | doc: A birthday is the anniversary of the birth of a person, or figuratively of an
5 | institution. Birthdays of people are celebrated in numerous cultures, often with
6 | birthday gifts, birthday cards, a birthday party, or a rite of passage.
7 | id: birthday
8 | is_pii: 'True'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Birthday
14 | name: Birthday
15 | parent:
16 | type: date
17 | translations:
18 | ru:
19 | doc: 'День рождения — годовщина рождения, соответствует дате рождения человека. '
20 | name: День рождения
21 | examples: []
22 | wikidata_property: ''
23 | regexp: ''
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/what3words.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: What3words is a proprietary geocode system designed to identify any location
4 | with a resolution of about 3 metres (9.8 ft). It is owned by What3words Limited,
5 | based in London, England.
6 | id: what3words
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/What3word
13 | name: What3Words
14 | wikidata_property: P1566
15 | regexp: '[1-9][0-9]{0,8}|'
16 | examples:
17 | - value: thrive.collaborating.slangy
18 | description: What3Words example 1
19 | - value: squirrels.devours.longitudes
20 | description: What3Words example 2
21 | classification: identifier
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/GB/geo/uk_uprn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - GB
5 | doc: The Unique Property Reference Number (UPRN) is a unique alphanumeric identifier
6 | (a geocode) for every spatial address in Great Britain and can be found in Ordnance
7 | Survey AddressBase products.
8 | id: uprn
9 | is_pii: 'False'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/UPRN
15 | - type: wikidata
16 | url: https://www.wikidata.org/wiki/Property:P8399
17 | wikidata_property: P8399
18 | name: UK Unique Property Reference Number (UPRN)
19 | classification: identifier
20 | examples:
21 | - value: '9051138577'
22 | description: Marischal College
23 | regexp: \d{12}
24 | translations: {}
25 |
--------------------------------------------------------------------------------
/data/datatypes/RU/government/ru_kbk.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - RU
5 | doc: Russian budget classification code KBK (kod byudzhetnoy klassifikacii) identifiers
6 | certain budget allocation of the federal or regional budget
7 | id: kbk
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: Russian budget classification code (KBK)
12 | translations:
13 | ru:
14 | doc: Код бюджетной классификации (КБК) используется в бюджетной системе РФ для
15 | идентификации конкретной строки расхода федерального или регионального государственного
16 | бюджета.
17 | name: Код бюджетной классификации (КБК)
18 | classification: categorical
19 | links: []
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/swiftcode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: ISO 9362 defines a standard format of Business Identifier Codes (also known as
4 | SWIFT-BIC, BIC, SWIFT ID or SWIFT code) approved by the International Organization
5 | for Standardization (ISO).
6 | id: swiftcode
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/ISO_9362
13 | - type: wikidata
14 | url: https://www.wikidata.org/wiki/Property:P2627
15 | name: Bank SWIFT code (ISO 9362)
16 | regexp: '[A-Z]{6}[0-9A-Z]{2}([0-9A-Z]{3})?'
17 | wikidata_property: P2627
18 | examples:
19 | - value: DEUTDEFF
20 | description: Deutsche Bank
21 | classification: identifier
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/software/authentihash.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - software
3 | doc: authentihash is a sha256 hash used by Microsoft to verify that the relevant sections
4 | of a PE image file have not been altered. This specific type of hash is used by
5 | Microsoft AppLocker.
6 | id: authentihash
7 | is_pii: 'False'
8 | parent:
9 | type: sha256hash
10 | langs:
11 | - common
12 | links:
13 | - type: other
14 | url: https://developers.virustotal.com/reference/authentihash
15 | name: authentihash
16 | examples:
17 | - value: b9be0c1b4a48e7acd1bd186d31f86212794a824f0a9602242b054f9d98bd7c70
18 | description: Virus total file Twain_32.dll
19 | classification: identifier
20 | wikidata_property: ''
21 | translations: {}
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/US/industry/us_soc.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - industry
3 | country:
4 | - US
5 | - GB
6 | doc: Standard Occupational Classification System
7 | id: ensoc
8 | is_pii: 'False'
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Standard_Occupational_Classification_System
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P919
14 | langs:
15 | - en
16 | name: Standard Occupational Classification System (US and GB)
17 | wikidata_property: P919
18 | regexp: \d{2}\-\d{6}
19 | examples:
20 | - value: 15-1242
21 | description: Database Administrators
22 | - value: 21-1023
23 | description: 'Mental Health and Substance Abuse Social Workers '
24 | classification: categorical
25 | translations: {}
26 |
--------------------------------------------------------------------------------
/data/datatypes/any/finances/iban.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | doc: The International Bank Account Number (IBAN) is an internationally agreed system
4 | of identifying bank accounts across national borders to facilitate the communication
5 | and processing of cross border transactions with a reduced risk of transcription
6 | errors. An IBAN uniquely identifies the account of a customer at a financial institution.
7 | id: iban
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/International_Bank_Account_Number
14 | name: International Bank Account Number
15 | classification: identifier
16 | examples: []
17 | wikidata_property: ''
18 | translations: {}
19 | regexp: ''
20 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_atin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - US
6 | doc: An ATIN is an Adoption Taxpayer Identification Number issued by the Internal
7 | Revenue Service as a temporary taxpayer identification number for the child in a
8 | domestic adoption where the adopting taxpayers do not have and/or are unable to
9 | obtain the child's Social Security Number (SSN).
10 | id: usatin
11 | is_pii: 'True'
12 | langs:
13 | - en
14 | links:
15 | - type: other
16 | url: https://www.irs.gov/individuals/adoption-taxpayer-identification-number
17 | regexp: '[0-9]{9}'
18 | name: US ATIN (Adoption Taxpayer Identification Number)
19 | classification: identifier
20 | examples: []
21 | wikidata_property: ''
22 | translations: {}
23 |
--------------------------------------------------------------------------------
/data/tools/pii/ibm-guardium.yaml:
--------------------------------------------------------------------------------
1 | id: ibmguardium
2 | category: pii
3 | name: IBM Security Guardium Analyzer
4 | doc: 'IBM Security Guardium Analyzer is a powerful service that can help you with this data security effort. Simple to use, Guardium Analyzer allows you to set up connections to your data sources - with no need to configure classification or risk scanning.'
5 | website: https://www.ibm.com/docs/en/sga
6 | supported_types:
7 | - email
8 | - phone
9 | - bankcard
10 | - address
11 | - person_name
12 | - person_fullname
13 | - person_surname
14 | - person_firstname
15 | - benatcardid
16 | - ipaddr
17 | - ussn
18 | - uknhsnum
19 | - esnif
20 | - aumedicarenum
21 | - autfn
22 | - casin
23 |
--------------------------------------------------------------------------------
/data/tools/pii/metadata-guardian.yaml:
--------------------------------------------------------------------------------
1 | id: metadata-guardian
2 | category: pii
3 | name: Metadata Guardian
4 | doc: Metadata Guardian is a Python package that provides an easy way to protect your data sources by searching its metadata. By searching with data rules, it will detect what you are looking to protect. Using Rust, it makes blazing fast multi-regex matching.
5 | website: https://github.com/fvaleye/metadata-guardian
6 | supported_types:
7 | - email
8 | - phone
9 | - ipv4
10 | - ipv6
11 | - ipaddr
12 | - postindex
13 | - address
14 | - person_firstname
15 | - person_surname
16 | - person_fullname
17 | - birthday
18 | - gender
19 | - nationality
20 | - username
21 | - password
22 |
23 |
--------------------------------------------------------------------------------
/data/datatypes/DK/persons/dk_cpr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | country:
5 | - DK
6 | doc: 'The Danish Personal Identification number is a national identification number,
7 | which is part of the personal information stored in the Civil Registration System
8 | (Danish: Det Centrale Personregister, Greenlandic: Inunnik Qitiusumik Nalunaarsuiffik).'
9 | id: dkcprnum
10 | is_pii: 'True'
11 | langs:
12 | - da
13 | links:
14 | - type: wikipedia
15 | url: https://en.wikipedia.org/wiki/Personal_identification_number_(Denmark)
16 | regexp: /^(0[1-9]|[12]\d|3[01])(0[1-9]|1[0-2])\d{2}[-]?\d{4}$
17 | name: CPR-Number, Personal identification number (Denmark)
18 | classification: identifier
19 | examples: []
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/US/finances/us_cusip.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | country:
4 | - US
5 | doc: A CUSIP is a nine-digit numeric (e.g. 037833100 for Apple) or nine-character
6 | alphanumeric (e.g. 38259P508 for Google) code that identifies a North American financial
7 | security for the purposes of facilitating clearing and settlement of trades
8 | id: cusip
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/CUSIP
15 | name: CUSIP code
16 | regexp: ^[0-9]{3}[a-zA-Z0-9]{2}[a-zA-Z0-9*@#]{3}[0-9]$
17 | examples:
18 | - value: 037833100
19 | description: Apple
20 | - value: 38259P508
21 | description: Google
22 | classification: identifier
23 | wikidata_property: ''
24 | translations: {}
25 |
--------------------------------------------------------------------------------
/data/datatypes/US/geo/us_fips64.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - US
5 | doc: Identifier for US entities (mostly counties) per "Federal Information Processing
6 | Series" (FIPS 6-4), used for counties, Puerto Rico zona urbana, Metropolitan Statistical
7 | Areas (MSA) and Combined Statistical Areas (CSA) in the United States.
8 | id: fips64
9 | is_pii: 'False'
10 | links:
11 | - type: wikidata
12 | url: https://www.wikidata.org/wiki/Property:P882
13 | langs:
14 | - en
15 | name: FIPS 6-4 numeric code (US counties)
16 | wikidata_property: P882
17 | regexp: \d{5}
18 | examples:
19 | - value: '24003'
20 | description: Anne Arundel County
21 | - value: '01001'
22 | description: Autauga Country
23 | classification: categorical
24 | translations: {}
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/p2shaddr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | doc: 'Pay to script hash (P2SH) transactions were standardised in BIP 16. They allow
4 | transactions to be sent to a script hash (address starting with 3) instead of a
5 | public key hash (addresses starting with 1). To spend bitcoins sent via P2SH, the
6 | recipient must provide a script matching the script hash and data which makes the
7 | script evaluate to true. '
8 | id: p2shaddr
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: other
14 | url: https://allprivatekeys.com/bitcoin-address-format
15 | name: Script hash (P2SH address)
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/iataairlinecode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: IATA airline designators, sometimes called IATA reservation codes, are two-character
4 | codes assigned by the International Air Transport Association (IATA) to the worlds
5 | airlines.
6 | id: iataairlinecode
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Airline_codes
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P229
14 | name: IATA airline designator (airline code)
15 | semantic_type: airlinecode
16 | wikidata_property: P229
17 | regexp: '[A-Z0-9]{2}'
18 | examples:
19 | - value: MH
20 | description: Malaysia Airlines
21 | - value: QW
22 | description: Blue Wings
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/FR/geo/fr_inseecode.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | country:
4 | - FR
5 | doc: 'The INSEE code is a numerical indexing code used by the French National Institute
6 | for Statistics and Economic Studies (INSEE) to identify various entities, including
7 | communes and départements. '
8 | id: inseecode
9 | is_pii: 'False'
10 | langs:
11 | - fr
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/INSEE_code
15 | - type: wikidata
16 | url: https://www.wikidata.org/wiki/Property:P374
17 | wikidata_property: P374
18 | regexp: \d[0-9AB][0-9P]\d\d
19 | name: INSEE code
20 | examples:
21 | - value: '75056'
22 | description: Paris
23 | - value: '48095'
24 | description: Mende
25 | classification: identifier
26 | translations: {}
27 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/unm49.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: UN M49 or the Standard Country or Area Codes for Statistical Use (Series M, No.
4 | 49) is a standard for area codes used by the United Nations for statistical purposes,
5 | developed and maintained by the United Nations Statistics Division.
6 | id: unm49
7 | is_pii: 'False'
8 | langs:
9 | - en
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/UN_M49
13 | - type: wikidata
14 | url: https://www.wikidata.org/wiki/Property:P2082
15 | name: UN M49
16 | wikidata_property: P2082
17 | regexp: '[0-9]{3}'
18 | examples:
19 | - value: '005'
20 | description: South America
21 | - value: '554'
22 | description: New Zealand
23 | classification: categorical
24 | translations: {}
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/city.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - pii
4 | - common
5 | doc: A city is a large human settlement. It can be defined as a permanent and densely
6 | settled place with administratively defined boundaries whose members work primarily
7 | on non-agricultural tasks.
8 | id: city
9 | is_pii: 'True'
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/City
15 | - type: schema.org
16 | url: https://schema.org/City
17 | name: City
18 | patterns:
19 | - ruscity
20 | - uscity
21 | translations:
22 | ru:
23 | doc: Город — крупный населённый пункт, жители которого заняты, как правило, не
24 | сельским хозяйством.
25 | name: Город
26 | examples: []
27 | wikidata_property: ''
28 | regexp: ''
29 |
--------------------------------------------------------------------------------
/data/datatypes/RU/medical/ru_medmnn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - medical
3 | country:
4 | - RU
5 | doc: An international nonproprietary name (INN) is an official generic and non-proprietary
6 | name given to a pharmaceutical drug or an active ingredient. (Russian)
7 | id: rumedmnn
8 | is_pii: 'False'
9 | langs:
10 | - ru
11 | name: International nonproprietary name (Russia)
12 | translations:
13 | ru:
14 | doc: Международное непатентованное наименование (МНН) — уникальное наименование
15 | действующего вещества лекарственного средства, рекомендованное Всемирной организацией
16 | здравоохранения (ВОЗ).
17 | name: Международное непатентованное наименование
18 | classification: identifier
19 | links: []
20 | examples: []
21 | wikidata_property: ''
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/iso6709.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: Geographic point as string by ISO 6709, ISO 6709, Standard representation of
4 | geographic point location by coordinates, is the international standard for representation
5 | of latitude, longitude and altitude for geographic point locations.
6 | id: geopoint
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/ISO_6709
13 | name: ISO 6709 geopoint
14 | examples:
15 | - value: 50°40′46.461″N 95°48′26.533″W 123.45m
16 | description: Geopoint example 1
17 | - value: 50°03′46.461″S 125°48′26.533″E 978.90m
18 | description: Geopoint example 2
19 | classification: identifier
20 | wikidata_property: ''
21 | translations: {}
22 | regexp: ''
23 |
--------------------------------------------------------------------------------
/data/datatypes/any/chemistry/unclass.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - chemistry
3 | doc: UN hazard classification code
4 | id: unclass
5 | is_pii: 'False'
6 | langs:
7 | - common
8 | links:
9 | - type: wikidata
10 | url: https://www.wikidata.org/wiki/Property:P874
11 | name: UN Class
12 | wikidata_property: P874
13 | regexp: '(1\.[1-6]|[24]\.[123]|[56]\.[12]|[3789])'
14 | examples:
15 | - value: '6.1'
16 | description: sodium azide
17 | translations:
18 | ru:
19 | doc: 'Номер ООН, Номер UN или UN-идентификатор — четырёхзначное число, позволяющее определить опасность вещества или изделия (такого, например, как взрывчатое вещество, легковоспламеняющаяся жидкость, ядовитое вещество и др.)'
20 | name: Код класса опасности ООН
21 | classification: categorical
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/airport.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | - transport
4 | doc: 'A location identifier is a symbolic representation for the name and the location
5 | of an airport, navigation aid, or weather station, and is used for staffed air traffic
6 | control facilities in air traffic control, telecommunications, computer programming,
7 | weather reports, and related services. '
8 | id: airport
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/Location_identifier
15 | name: Airport location identifier
16 | patterns:
17 | - icaoairportcode
18 | - iataairportcode
19 | - faalid
20 | classification: identifier
21 | examples: []
22 | wikidata_property: ''
23 | translations: {}
24 | regexp: ''
25 |
--------------------------------------------------------------------------------
/data/datatypes/any/companies/opencorporatesid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | doc: 'Identifier for a corporation, in the OpenCorporates database. Format: country
4 | prefix, optional subnational entity abbrevation, "/", alphanumeric idid: oid'
5 | is_pii: 'False'
6 | id: opencorporatesid
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/OpenCorporates
12 | - type: wikidata
13 | url: https://www.wikidata.org/wiki/Property:P1320
14 | name: Open Corporates ID
15 | wikidata_property: P1320
16 | regexp: '[a-z]{2}(_[a-z]{2})?/[a-zA-ZÖÜÄ0-9\-_]*[0-9A-Za-f]'
17 | examples:
18 | - value: gb/SC129785
19 | description: Devro
20 | - value: mx/2682534
21 | description: Mexican Red Cross
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/p2pkhaddr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | doc: 'A Bitcoin address is only a hash, so the sender can''t provide a full public
4 | key in scriptPubKey. When redeeming coins that have been sent to a Bitcoin address,
5 | the recipient provides both the signature and the public key. The script verifies
6 | that the provided public key does hash to the hash in scriptPubKey, and then it
7 | also checks the signature against the public key. '
8 | id: p2pkhaddr
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: other
14 | url: https://allprivatekeys.com/bitcoin-address-format
15 | name: Pubkey hash (P2PKH address)
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/RU/companies/ru_okfs.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - common
3 | country:
4 | - RU
5 | doc: Russian classifier of forms of ownership, OKFS
6 | id: okfs
7 | is_pii: 'False'
8 | langs:
9 | - ru
10 | links:
11 | - type: other
12 | url: https://ru.wikipedia.org/wiki/Общероссийские_классификаторы_технико-экономической_и_социальной_информации
13 | name: Russian form of ownership code (OKFS)
14 | regexp: \d{2}
15 | examples:
16 | - value: '12'
17 | description: Федеральная собственность
18 | - value: '22'
19 | description: Собственность иностранных государств
20 | translations:
21 | ru:
22 | doc: ОКФС — это общероссийский классификатор форм собственности, входящий в Единую
23 | систему кодирования РФ
24 | name: Код ОКФС
25 | classification: categorical
26 | wikidata_property: ''
27 |
--------------------------------------------------------------------------------
/data/datatypes/any/cryptocurrency/bip32addr.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - cryptocurrency
3 | doc: The BIP-32 mainly describes the way of building the hierarchical deterministic
4 | wallet basing on the BIP-39 features. What does it mean? BIP-32 is exactly explaining
5 | how the bitcoin master private key and the master chain code is being created from
6 | delivered by BIP-39 bitcoin seed. The bitcoin seed generated by BIP-39 is the base
7 | for generating deterministic and hierarchical deterministic wallets.
8 | id: bip32addr
9 | is_pii: 'False'
10 | langs:
11 | - common
12 | links:
13 | - type: other
14 | url: https://allprivatekeys.com/bitcoin-address-format
15 | name: BIP-32 key
16 | classification: identifier
17 | examples: []
18 | wikidata_property: ''
19 | translations: {}
20 | regexp: ''
21 |
--------------------------------------------------------------------------------
/data/datatypes/any/internet/fqdn.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - internet
3 | doc: A domain name is an identification string that defines a realm of administrative
4 | autonomy, authority or control within the Internet.
5 | id: fqdn
6 | is_pii: 'False'
7 | langs:
8 | - common
9 | links:
10 | - type: wikipedia
11 | url: https://en.wikipedia.org/wiki/Fully_qualified_domain_name
12 | name: Full qualified domain name (FQDN)
13 | regexp: ^(?!:\/\/)(?=.{1,255}$)((.{1,63}\.){1,127}(?![0-9]*$)[a-z0-9-]+\.?)$
14 | examples:
15 | - value: ec2-35-160-210-253.us-west-2-.compute.amazonaws.com
16 | description: Amazon web services ec2 server
17 | - value: xn--kxae4bafwg.xn--pxaix.gr
18 | description: Greek language national domain name
19 | classification: identifier
20 | wikidata_property: ''
21 | translations: {}
22 |
--------------------------------------------------------------------------------
/data/datatypes/EU/companies/eu_vatin.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | country:
4 | - EU
5 | doc: A value-added tax identification number or VAT identification number (VATIN)
6 | is an identifier used in many countries, including the countries of the European
7 | Union, for value-added tax purposes.
8 | id: vatin
9 | is_pii: 'False'
10 | langs:
11 | - en
12 | links:
13 | - type: wikipedia
14 | url: https://en.wikipedia.org/wiki/VAT_identification_number
15 | - type: wikidata
16 | url: https://www.wikidata.org/wiki/Property:P3608
17 | name: EU VAT ID
18 | wikidata_property: P3608
19 | regexp: '[A-Z]{2}[A-Z\d]+|'
20 | examples:
21 | - value: FI15243611
22 | description: Sanoma Corporation
23 | - value: BG200356710
24 | description: Ontotext
25 | classification: identifier
26 | translations: {}
27 |
--------------------------------------------------------------------------------
/data/datatypes/GB/companies/uk_companyhouseid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - companies
3 | doc: Numeric identifier for company registered with Companies House in the United
4 | Kingdom
5 | is_pii: 'False'
6 | langs:
7 | - en
8 | country:
9 | - GB
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Companies_House
13 | - type: wikidata
14 | url: https://www.wikidata.org/wiki/Property:P2622
15 | id: companyhouseid
16 | name: Companies House company ID (UK)
17 | wikidata_property: P2622
18 | regexp: (AC|FC|GE|GN|GS|IC|IP|LP|NA|NF|NI|NL|NO|NP|NR|NZ|OC|R|RC|SA|SC|SF|SI|SL|SO|SP|SR|SZ|ZC|[0-9]{2})[0-9RS]{6}
19 | examples:
20 | - value: 02050843
21 | description: Aardman Animation
22 | - value: 00185647
23 | description: ' Sainsbury''s'
24 | classification: identifier
25 | translations: {}
26 |
--------------------------------------------------------------------------------
/data/datatypes/CA/finances/cagst.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - finances
3 | id: cagst
4 | is_pii: 'False'
5 | langs:
6 | - en
7 | name: GST/HST account number
8 | doc: CA datatype for 'GST/HST account number' (cagst) from rules in
9 | context 'finances'.
10 | classification: identifier
11 | country:
12 | - CA
13 | links:
14 | - type: doc
15 | url: https://www.canada.ca/en/revenue-agency/services/tax/businesses/topics/gst-hst-businesses/registration/gst-hst-account-number.html
16 | examples:
17 | - value: '123456789RT0001'
18 | description: GST/HST account number with 9-digit Business Number, 'RT' program identifier and 4-digit reference
19 | - value: '987654321RT0002'
20 | description: Another valid GST/HST account number format
21 | wikidata_property: ''
22 | translations: {}
23 | regexp: ^[0-9]{9}RT[0-9]{4}$
24 |
--------------------------------------------------------------------------------
/data/datatypes/US/persons/us_dea_certificate.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - pii
3 | - persons
4 | - medical
5 | country:
6 | - US
7 | doc: A DEA number (DEA Registration Number) is an identifier assigned to a health
8 | care provider (such as a physician, physician assistant, nurse practitioner, optometrist,
9 | podiatrist, dentist, or veterinarian) by the United States Drug Enforcement Administration
10 | allowing them to write prescriptions for controlled substances.
11 | id: usdeanumber
12 | is_pii: 'True'
13 | langs:
14 | - en
15 | links:
16 | - type: wikipedia
17 | url: https://en.wikipedia.org/wiki/DEA_number
18 | regexp: '[a-zA-Z]{2}\d{7}|[a-zA-Z]{1}9\d{7}'
19 | name: USA DEA Registration (cerficicate) Number
20 | classification: identifier
21 | examples: []
22 | wikidata_property: ''
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/flightnumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - transport
3 | doc: In the aviation industry, a flight number or flight designator is a code for
4 | an airline service consisting of two-character airline designator and a 1 to 4 digit
5 | number.
6 | id: flightnumber
7 | is_pii: 'False'
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/Flight_number
13 | - type: wikidata
14 | url: https://www.wikidata.org/wiki/Property:P3090
15 | name: Flight number
16 | wikidata_property: P3090
17 | regexp: ([A-Z]{2,3}|[A-Z][0-9]|[0-9][A-Z])\d{1,4}[A-Z]?|\d{1,4}
18 | examples:
19 | - value: EK521
20 | description: Emirates Flight 521
21 | - value: 4U9525
22 | description: Germanwings Flight 9525
23 | classification: identifier
24 | translations: {}
25 |
--------------------------------------------------------------------------------
/data/tools/other/googledatastudio.yaml:
--------------------------------------------------------------------------------
1 | id: googledatastudio
2 | category: other
3 | name: Google Data Studio
4 | doc: 'With Data Studio, you can easily report on data from a wide variety of sources, without programing. In just a few moments, you can connect to data sets such as: Databases, including BigQuery, MySQL, and PostgreSQL. Google Marketing Platform products, including Google Ads, Analytics, Display & Video 360, Search Ads 360.'
5 | website: https://developers.google.com/datastudio/connector/semantics#semantic-type-detection
6 | supported_types:
7 | - geopoint
8 | - country
9 | - city
10 | - datetime
11 | - time
12 | - address
13 | - person_name
14 | - year
15 | - date
16 | - month
17 | - day
18 | - dayofweek
19 | - boolean
20 | - url
21 |
22 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/data/datatypes/any/transport/air/aircraftnumber.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - objectids
3 | - transport
4 | doc: An aircraft registration, alternatively called a tail number, is a code unique
5 | to a single aircraft, required by international convention to be marked on the exterior
6 | of every civil aircraft.
7 | id: aircraftnumber
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/Aircraft_registration
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P426
16 | name: Aircraft/tail number
17 | wikidata_property: P426
18 | regexp: '[A-Z0-9- ]+'
19 | examples:
20 | - value: N-X-211
21 | description: Spirit of St. Louis
22 | - value: N736PA
23 | description: Clipper Victor
24 | classification: identifier
25 | translations: {}
26 |
--------------------------------------------------------------------------------
/data/datatypes/US/government/us_piid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - government
3 | country:
4 | - US
5 | doc: The PIID consists of a combination of thirteen to seventeen alpha and/or numeric
6 | characters sequenced to convey certain information
7 | id: uspiid
8 | is_pii: 'False'
9 | langs:
10 | - en
11 | links:
12 | - type: other
13 | url: https://www.acquisition.gov/sites/default/files/current/far/compiled_html/subpart_4.16.html
14 | - type: other
15 | url: https://www.fpds.gov/help/Procurement_Instrument_Identifier.htm
16 | name: Procurement Instrument Identifier (PIID)
17 | regexp: '[0-9A-Z]{13,17}'
18 | examples:
19 | - value: 19RS5022F0185
20 | description: PIID 19RS5022F0185
21 | - value: 19RS5021D0001
22 | description: PIID 19RS5021D0001
23 | classification: identifier
24 | wikidata_property: ''
25 | translations: {}
26 |
--------------------------------------------------------------------------------
/data/datatypes/any/chemistry/pubchemid.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - chemistry
3 | doc: PubChem is a database of chemical molecules and their activities against biological
4 | assays. The system is maintained by the National Center for Biotechnology Information
5 | (NCBI), a component of the National Library of Medicine, which is part of the United
6 | States National Institutes of Health (NIH).
7 | id: pubchemid
8 | is_pii: 'False'
9 | langs:
10 | - common
11 | links:
12 | - type: wikipedia
13 | url: https://en.wikipedia.org/wiki/PubChem
14 | - type: wikidata
15 | url: https://www.wikidata.org/wiki/Property:P2153
16 | name: PubChem Substance ID (SID)
17 | wikidata_property: P2153
18 | regexp: '[1-9]\d*'
19 | examples:
20 | - value: '135307179'
21 | description: zeolite Y
22 | classification: identifier
23 | translations: {}
24 |
--------------------------------------------------------------------------------
/data/datatypes/any/geo/countrycode_alpha3.yaml:
--------------------------------------------------------------------------------
1 | categories:
2 | - geo
3 | doc: ISO 3166-1 alpha-3 codes are three-letter country codes defined in ISO 3166-1,
4 | part of the ISO 3166 standard published by the International Organization for Standardization
5 | (ISO), to represent countries, dependent territories, and special areas of geographical
6 | interest.
7 | id: countrycode_alpha3
8 | langs:
9 | - common
10 | links:
11 | - type: wikipedia
12 | url: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3
13 | - type: wikidata
14 | url: https://www.wikidata.org/wiki/Property:P298
15 | name: ISO 3166-1 Alpha3 country code
16 | semantic_type: country
17 | regexp: '[A-Z]{3}'
18 | wikidata_property: P298
19 | examples:
20 | - value: RUS
21 | description: Russia
22 | - value: DEU
23 | description: Germany
24 | translations: {}
25 |
--------------------------------------------------------------------------------