├── scripts └── .gitkeep ├── .idea ├── .name ├── vcs.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── metacrafter-registry.iml └── modules.xml ├── .gitignore ├── data ├── datahub │ └── bg.yaml ├── datatypes │ ├── any │ │ ├── datetime │ │ │ ├── age.yaml │ │ │ ├── en_month.yaml │ │ │ ├── time.yaml │ │ │ ├── en_dayofweek.yaml │ │ │ ├── en_monthshort.yaml │ │ │ ├── month.yaml │ │ │ ├── timerange.yaml │ │ │ ├── dayofmonth.yaml │ │ │ ├── monthday.yaml │ │ │ ├── yearmonth.yaml │ │ │ ├── year.yaml │ │ │ ├── dayofweek.yaml │ │ │ ├── deathday.yaml │ │ │ ├── quarter.yaml │ │ │ ├── date.yaml │ │ │ ├── timezone.yaml │ │ │ ├── datetime.yaml │ │ │ ├── utctimezoneoffiset.yaml │ │ │ ├── ianatimezoneid.yaml │ │ │ ├── duration.yaml │ │ │ ├── timespan.yaml │ │ │ ├── iso8601duration.yaml │ │ │ ├── unixtime.yaml │ │ │ └── birthday.yaml │ │ ├── internet │ │ │ ├── ipsubnet.yaml │ │ │ ├── ipaddr.yaml │ │ │ ├── eui48.yaml │ │ │ ├── ipv6subnet.yaml │ │ │ ├── asn.yaml │ │ │ ├── ipv4.yaml │ │ │ ├── ipv6.yaml │ │ │ ├── tld.yaml │ │ │ ├── ipv4subnet.yaml │ │ │ └── fqdn.yaml │ │ ├── values │ │ │ ├── percentage.yaml │ │ │ ├── income.yaml │ │ │ ├── quantity.yaml │ │ │ ├── price.yaml │ │ │ ├── money_amount.yaml │ │ │ ├── cost.yaml │ │ │ ├── discount.yaml │ │ │ ├── score.yaml │ │ │ ├── grossmargin.yaml │ │ │ ├── numeric.yaml │ │ │ └── share.yaml │ │ ├── geo │ │ │ ├── geopoint.yaml │ │ │ ├── street.yaml │ │ │ ├── en_countryname.yaml │ │ │ ├── iso3166code.yaml │ │ │ ├── geonamesid.yaml │ │ │ ├── continent.yaml │ │ │ ├── wbregion.yaml │ │ │ ├── longitude.yaml │ │ │ ├── latitude.yaml │ │ │ ├── address.yaml │ │ │ ├── postindex.yaml │ │ │ ├── what3words.yaml │ │ │ ├── unm49.yaml │ │ │ ├── city.yaml │ │ │ ├── iso6709.yaml │ │ │ └── countrycode_alpha3.yaml │ │ ├── persons │ │ │ ├── password.yaml │ │ │ ├── gender.yaml │ │ │ ├── person_midname.yaml │ │ │ ├── username.yaml │ │ │ ├── person_fullname.yaml │ │ │ ├── person_surname.yaml │ │ │ ├── person_firstname.yaml │ │ │ ├── nationality.yaml │ │ │ ├── en_nationality.yaml │ │ │ ├── passport.yaml │ │ │ └── jobtitle.yaml │ │ ├── transport │ │ │ ├── air │ │ │ │ ├── airlinename.yaml │ │ │ │ ├── airlinecallsign.yaml │ │ │ │ ├── airlinecode.yaml │ │ │ │ ├── iataairlinecode.yaml │ │ │ │ ├── airport.yaml │ │ │ │ ├── flightnumber.yaml │ │ │ │ └── aircraftnumber.yaml │ │ │ ├── vehicles │ │ │ │ └── vehiclenumber.yaml │ │ │ ├── railway │ │ │ │ └── uiccode.yaml │ │ │ └── maritime │ │ │ │ └── imonumber.yaml │ │ ├── pii │ │ │ ├── nhsnumber.yaml │ │ │ ├── ssn.yaml │ │ │ └── ninumber.yaml │ │ ├── finances │ │ │ ├── bic.yaml │ │ │ ├── clabe.yaml │ │ │ ├── ifsc.yaml │ │ │ ├── aba.yaml │ │ │ ├── uksortcode.yaml │ │ │ ├── catransit.yaml │ │ │ ├── iso4217curname.yaml │ │ │ ├── iso4217curcode.yaml │ │ │ ├── currency.yaml │ │ │ ├── tickersymbol.yaml │ │ │ ├── swiftcode.yaml │ │ │ └── iban.yaml │ │ ├── medical │ │ │ ├── npi.yaml │ │ │ ├── upin.yaml │ │ │ ├── cptcode.yaml │ │ │ ├── snomedcode.yaml │ │ │ └── gmcnumber.yaml │ │ ├── education │ │ │ ├── ukurn.yaml │ │ │ ├── ukprn.yaml │ │ │ ├── studentid.yaml │ │ │ └── ncesid.yaml │ │ ├── identifiers │ │ │ ├── id.yaml │ │ │ ├── guid.yaml │ │ │ ├── mongodbid.yaml │ │ │ ├── uuid.yaml │ │ │ └── dcid.yaml │ │ ├── common │ │ │ ├── category.yaml │ │ │ ├── name.yaml │ │ │ ├── hexcolor.yaml │ │ │ ├── color.yaml │ │ │ └── genre.yaml │ │ ├── environment │ │ │ ├── gridnode.yaml │ │ │ ├── powerplantid.yaml │ │ │ ├── waterstation.yaml │ │ │ └── emissioninventory.yaml │ │ ├── cryptocurrency │ │ │ ├── p2wshaddr.yaml │ │ │ ├── p2wpkhaddr.yaml │ │ │ ├── wifaddrcomp.yaml │ │ │ ├── wifaddruncomp.yaml │ │ │ ├── p2shaddr.yaml │ │ │ ├── p2pkhaddr.yaml │ │ │ └── bip32addr.yaml │ │ ├── cryptography │ │ │ ├── sha1hash.yaml │ │ │ ├── sha256hash.yaml │ │ │ ├── md5hash.yaml │ │ │ ├── sct.yaml │ │ │ └── crc32.yaml │ │ ├── texts │ │ │ ├── headline.yaml │ │ │ ├── description.yaml │ │ │ └── comment.yaml │ │ ├── shipping │ │ │ ├── tracknum.yaml │ │ │ ├── s10upu.yaml │ │ │ └── unpackaginggroup.yaml │ │ ├── science │ │ │ ├── scopusauthoirid.yaml │ │ │ ├── researcherid.yaml │ │ │ ├── openalexid.yaml │ │ │ ├── academicdegree.yaml │ │ │ └── doiprefix.yaml │ │ ├── dublincore │ │ │ ├── dclanguage.yaml │ │ │ ├── dctitle.yaml │ │ │ └── dcabstract.yaml │ │ ├── software │ │ │ ├── programminglang.yaml │ │ │ ├── filesize.yaml │ │ │ ├── imphash.yaml │ │ │ ├── datasize.yaml │ │ │ ├── telfhash.yaml │ │ │ ├── tlsh.yaml │ │ │ ├── vhash.yaml │ │ │ ├── ssdeep.yaml │ │ │ ├── richpeheader.yaml │ │ │ └── authentihash.yaml │ │ ├── useraccounts │ │ │ ├── twitter_username.yaml │ │ │ ├── skype_username.yaml │ │ │ ├── github_username.yaml │ │ │ └── instagram_username.yaml │ │ ├── companies │ │ │ ├── iso6523.yaml │ │ │ ├── en_companyname.yaml │ │ │ ├── orgname.yaml │ │ │ └── opencorporatesid.yaml │ │ ├── telecom │ │ │ ├── imsi.yaml │ │ │ ├── imei.yaml │ │ │ └── msisdn.yaml │ │ ├── government │ │ │ ├── legislationname.yaml │ │ │ └── legislationtype.yaml │ │ ├── objectids │ │ │ ├── wikidataid.yaml │ │ │ ├── openlibraryid.yaml │ │ │ └── viafid.yaml │ │ ├── files │ │ │ └── filename.yaml │ │ ├── industry │ │ │ └── isicrev4.yaml │ │ └── chemistry │ │ │ ├── unclass.yaml │ │ │ └── pubchemid.yaml │ ├── US │ │ ├── geo │ │ │ ├── us_city.yaml │ │ │ ├── us_county.yaml │ │ │ ├── us_state.yaml │ │ │ ├── us_fips52.yaml │ │ │ ├── us_zipcode.yaml │ │ │ └── us_fips64.yaml │ │ ├── persons │ │ │ ├── us_passport.yaml │ │ │ ├── us_npi.yaml │ │ │ ├── us_ptin.yaml │ │ │ ├── us_itin.yaml │ │ │ ├── us_ssn.yaml │ │ │ ├── us_ein.yaml │ │ │ ├── us_atin.yaml │ │ │ └── us_dea_certificate.yaml │ │ ├── industry │ │ │ ├── us_naicscode.yaml │ │ │ └── us_soc.yaml │ │ ├── telecom │ │ │ └── fccid.yaml │ │ ├── finances │ │ │ ├── us_bankaccount.yaml │ │ │ ├── us_aba_routing.yaml │ │ │ └── us_cusip.yaml │ │ └── government │ │ │ └── us_piid.yaml │ ├── RU │ │ ├── geo │ │ │ ├── ru_city.yaml │ │ │ ├── ru_street.yaml │ │ │ ├── ru_countryname.yaml │ │ │ ├── ru_postalcode.yaml │ │ │ ├── ru_regioncode.yaml │ │ │ ├── ru_regionname.yaml │ │ │ ├── ru_mosdistricts.yaml │ │ │ ├── ru_mosadmareas.yaml │ │ │ ├── ru_feddistrict.yaml │ │ │ ├── ru_region.yaml │ │ │ ├── ru_kadastr.yaml │ │ │ └── ru_kladr.yaml │ │ ├── datetime │ │ │ ├── ru_timerange.yaml │ │ │ └── ru_dayofweek.yaml │ │ ├── persons │ │ │ ├── ru_degree.yaml │ │ │ ├── ru_midname.yaml │ │ │ ├── ru_firstname.yaml │ │ │ ├── ru_surname.yaml │ │ │ ├── ru_fullname.yaml │ │ │ ├── ru_workposition.yaml │ │ │ ├── ru_passport.yaml │ │ │ ├── ru_intpassport.yaml │ │ │ └── ru_snils.yaml │ │ ├── companies │ │ │ ├── ru_companyname.yaml │ │ │ ├── ru_okogu.yaml │ │ │ ├── ru_okopfname.yaml │ │ │ ├── ru_ikuiko.yaml │ │ │ ├── ru_ikz.yaml │ │ │ ├── ru_okopf.yaml │ │ │ ├── ru_inn.yaml │ │ │ ├── ru_kpp.yaml │ │ │ ├── ru_okpo.yaml │ │ │ ├── ru_ogrn_ogrnip.yaml │ │ │ └── ru_okfs.yaml │ │ ├── government │ │ │ ├── ru_rnfi.yaml │ │ │ ├── ru_csrcode.yaml │ │ │ ├── ru_npa.yaml │ │ │ ├── ru_budgetname.yaml │ │ │ ├── ru_npakind.yaml │ │ │ ├── ru_kosgucode.yaml │ │ │ ├── ru_kvrcode.yaml │ │ │ ├── ru_budgetcode.yaml │ │ │ ├── ru_tofkcode.yaml │ │ │ ├── ru_tofkname.yaml │ │ │ ├── ru_ppocode.yaml │ │ │ ├── ru_kvrname.yaml │ │ │ ├── ru_pponame.yaml │ │ │ ├── ru_fedgrbs.yaml │ │ │ └── ru_kbk.yaml │ │ ├── finances │ │ │ ├── ru_okvalpha.yaml │ │ │ ├── ru_okvname.yaml │ │ │ └── ru_sbankaccount.yaml │ │ ├── industry │ │ │ ├── ru_okved.yaml │ │ │ └── ru_okpd.yaml │ │ └── medical │ │ │ ├── ru_medicinetradename.yaml │ │ │ ├── ru_medicineregnum.yaml │ │ │ └── ru_medmnn.yaml │ ├── AT │ │ └── geo │ │ │ └── at_city.yaml │ ├── FR │ │ ├── persons │ │ │ ├── fr_fullname.yaml │ │ │ ├── fr_nir.yaml │ │ │ ├── fr_passport.yaml │ │ │ └── fr_cni.yaml │ │ └── geo │ │ │ ├── fr_epcicode.yaml │ │ │ ├── fr_postcode.yaml │ │ │ ├── fr_siretcode.yaml │ │ │ └── fr_inseecode.yaml │ ├── NL │ │ └── companies │ │ │ └── rsin.yaml │ ├── AU │ │ ├── persons │ │ │ ├── au_passport.yaml │ │ │ ├── au_tfn_number.yaml │ │ │ └── au_medicare.yaml │ │ ├── finances │ │ │ └── au_bsb.yaml │ │ └── companies │ │ │ ├── au_acn.yaml │ │ │ └── au_abn.yaml │ ├── MX │ │ └── geo │ │ │ ├── mx_state.yaml │ │ │ └── mx_iso3166_2.yaml │ ├── CA │ │ ├── geo │ │ │ ├── ca_province copy.yaml │ │ │ └── ca_province.yaml │ │ ├── persons │ │ │ ├── ca_passport.yaml │ │ │ ├── ca_on_ohip.yaml │ │ │ ├── ca_driver_license.yaml │ │ │ ├── ca_sin.yaml │ │ │ └── ca_bc_ptn.yaml │ │ ├── companies │ │ │ ├── cacorp.yaml │ │ │ └── cabizlic.yaml │ │ └── finances │ │ │ ├── cacharity.yaml │ │ │ ├── cabn.yaml │ │ │ └── cagst.yaml │ ├── GB │ │ ├── persons │ │ │ ├── uk_passport.yaml │ │ │ ├── uk_utr.yaml │ │ │ ├── uk_nino.yaml │ │ │ ├── uk_driver_license.yaml │ │ │ └── uk_nhs_number.yaml │ │ ├── geo │ │ │ ├── uk_postalcode.yaml │ │ │ ├── uk_wardcode.yaml │ │ │ ├── uk_wardname.yaml │ │ │ ├── uk_toid.yaml │ │ │ └── uk_uprn.yaml │ │ ├── finances │ │ │ └── uk_sedol.yaml │ │ ├── medical │ │ │ └── uk_bnfcode.yaml │ │ └── companies │ │ │ └── uk_companyhouseid.yaml │ ├── TH │ │ └── persons │ │ │ └── th_idcard.yaml │ ├── ES │ │ ├── persons │ │ │ ├── es_driver_license.yaml │ │ │ ├── es_passport.yaml │ │ │ ├── es_nif_number.yaml │ │ │ └── es_nie_number.yaml │ │ └── geo │ │ │ └── es_postcode.yaml │ ├── DE │ │ ├── persons │ │ │ ├── de_driver_license.yaml │ │ │ └── de_personalausweis.yaml │ │ ├── medical │ │ │ └── opscode.yaml │ │ └── finances │ │ │ └── handelsregisternr.yaml │ ├── SE │ │ └── persons │ │ │ ├── se_passport.yaml │ │ │ └── se_personnumer.yaml │ ├── BR │ │ └── persons │ │ │ └── br_cpf.yaml │ ├── FI │ │ └── persons │ │ │ └── fi_natid.yaml │ ├── BE │ │ └── persons │ │ │ └── be_natcardnum.yaml │ ├── SG │ │ └── persons │ │ │ └── sg_nric.yaml │ ├── EU │ │ ├── industry │ │ │ └── eu_cpvcode.yaml │ │ ├── geo │ │ │ └── eu_nuts.yaml │ │ ├── transport │ │ │ ├── eu_cin.yaml │ │ │ └── eu_eninumber.yaml │ │ └── companies │ │ │ └── eu_vatin.yaml │ ├── AR │ │ └── persons │ │ │ └── ar_dni.yaml │ └── DK │ │ └── persons │ │ └── dk_cpr.yaml ├── tools │ ├── pii │ │ ├── piicatcher.yaml │ │ ├── scrubadub.yaml │ │ ├── ibm-guardium.yaml │ │ └── metadata-guardian.yaml │ └── other │ │ ├── soda.yaml │ │ ├── auctus.yaml │ │ ├── metabase.yaml │ │ └── googledatastudio.yaml ├── langs.yaml └── schemes │ └── tool.json ├── _original ├── countries.csv ├── languages.csv └── categories.csv ├── requirements.txt └── analysis ├── sherlock └── sherlock_datatypes.xlsx └── README.md /scripts/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | us_dea_certificate.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _site/ 2 | .sass-cache/ 3 | .jekyll-cache/ 4 | .jekyll-metadata 5 | -------------------------------------------------------------------------------- /data/datahub/bg.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | type: datahub-business-glossary 3 | config: 4 | file: metacrafter.yml 5 | -------------------------------------------------------------------------------- /_original/countries.csv: -------------------------------------------------------------------------------- 1 | RU Russian Federation 2 | GB United Kingdom 3 | US United States 4 | FR France 5 | ES Spain 6 | MX Mexico -------------------------------------------------------------------------------- /_original/languages.csv: -------------------------------------------------------------------------------- 1 | ru Russian 2 | common Common to most languages 3 | en English 4 | fr French 5 | es Spanish 6 | it Italian -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies for metacrafter-registry 2 | PyYAML>=6.0 3 | cerberus>=1.3.4 4 | typer>=0.9.0 5 | flask>=2.3.0 6 | 7 | -------------------------------------------------------------------------------- /analysis/sherlock/sherlock_datatypes.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apicrafter/metacrafter-registry/HEAD/analysis/sherlock/sherlock_datatypes.xlsx -------------------------------------------------------------------------------- /analysis/README.md: -------------------------------------------------------------------------------- 1 | # Analysis 2 | 3 | Review and analysis of existing metadata/semantic types tools. 4 | 5 | Someday will be moved to another repository. 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/age.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Age of the person or organization in years 4 | id: age 5 | is_pii: 'True' 6 | langs: 7 | - common 8 | name: Age 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/en_month.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: English name of the month 4 | id: enmonth 5 | langs: 6 | - en 7 | name: Month name (English) 8 | semantic_type: month 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_city.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: United States city name 6 | id: uscity 7 | langs: 8 | - en 9 | name: United States city 10 | semantic_type: city 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/time.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Time of the day, like 12:00 or 10PM or 11:05 or 18:01:27 4 | id: time 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Time 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipsubnet.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: Subnetwork of IPv4 or IPv6 addresses 4 | id: ipsubnet 5 | is_pii: 'True' 6 | langs: 7 | - common 8 | name: IPv4 or IPv6 subnet 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/any/values/percentage.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Parcentage float number 5 | id: percentage 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | name: Percentage value 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_city.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: City name in Russian language 6 | id: ruscity 7 | langs: 8 | - ru 9 | name: City name (Russian) 10 | semantic_type: city 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/geopoint.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: Basic geopoint as coordinates 4 | id: geopoint 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Geopoint 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/en_dayofweek.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Day of week writtern in English 4 | id: endayofweek 5 | langs: 6 | - en 7 | name: Day of week (in English) 8 | semantic_type: dayofweek 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/password.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | - pii 4 | doc: Passwords, usually associated with login/username/email. 5 | id: password 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: Password 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/AT/geo/at_city.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - AT 5 | doc: Austrian city name in German language 6 | id: atcity 7 | langs: 8 | - de 9 | name: Austrian city name (in German) 10 | semantic_type: city 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_street.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Street name writtern in Russian language 6 | id: rustreet 7 | langs: 8 | - ru 9 | name: Street name (Russian) 10 | semantic_type: street 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_county.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: Name or code of US County 6 | id: uscounty 7 | is_pii: 'False' 8 | langs: 9 | - en 10 | name: US County 11 | classification: categorical 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_state.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: Name or code of US State 6 | id: usstate 7 | is_pii: 'False' 8 | langs: 9 | - en 10 | name: US State 11 | classification: categorical 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/values/income.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Income, sub-type of money 5 | id: income 6 | is_pii: 'False' 7 | parent: 8 | type: money 9 | langs: 10 | - common 11 | name: Income (money) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/airlinename.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: Unique name of the airline 4 | id: airlinename 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Airline name 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/values/quantity.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Number of something, quantity 5 | id: quantity 6 | is_pii: 'False' 7 | parent: 8 | type: numeric 9 | langs: 10 | - common 11 | name: Quantity 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/datetime/ru_timerange.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | country: 4 | - RU 5 | doc: Time range written in Russian 6 | id: rustimerange 7 | langs: 8 | - ru 9 | name: Time range (Russian) 10 | semantic_type: timerange 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/values/price.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Price of something, sub-type of money 5 | id: price 6 | is_pii: 'False' 7 | parent: 8 | type: money 9 | langs: 10 | - common 11 | name: Price (money) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_degree.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | country: 4 | - RU 5 | doc: Academic degree title in Russian. 6 | id: rusdegree 7 | langs: 8 | - ru 9 | name: Academic degree (Russian) 10 | semantic_type: academicdegree 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_midname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Middle name in Russian 7 | id: rusmidname 8 | langs: 9 | - ru 10 | name: Middle name in Russian 11 | semantic_type: person_midname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/en_monthshort.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Abbreviated short English name/abbreviation of month 4 | id: enmonthshort 5 | langs: 6 | - en 7 | name: Short name of month (English) 8 | semantic_type: month 9 | links: [] 10 | examples: [] 11 | wikidata_property: '' 12 | translations: {} 13 | regexp: '' 14 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/gender.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | doc: Gender flag and/or any other identification value 5 | id: gender 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: Gender 10 | classification: categorical 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/values/money_amount.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Amount of money as numeric value 5 | id: money 6 | is_pii: 'False' 7 | parent: 8 | type: numeric 9 | langs: 10 | - common 11 | name: Money amount 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_firstname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: First name in Russian 7 | id: rusfirstname 8 | langs: 9 | - ru 10 | name: First name in Russian 11 | semantic_type: person_firstname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/person_midname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - pii 4 | doc: Middle name of the person 5 | id: person_midname 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: Person middle name 10 | patterns: 11 | - rusmidname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/pii/nhsnumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | id: nhsnumber 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: NHS number 8 | doc: Datatype for 'NHS number' (nhsnumber) from rules in context 'pii'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/values/cost.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Cost of something as sub-type of money 5 | id: cost 6 | is_pii: 'False' 7 | parent: 8 | type: money 9 | langs: 10 | - common 11 | name: Cost of something (money) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/values/discount.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Discount from price sub-type of money 5 | id: discount 6 | is_pii: 'False' 7 | parent: 8 | type: money 9 | langs: 10 | - common 11 | name: Discount (money) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/values/score.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Score of something as numeric/float number 5 | id: score 6 | is_pii: 'False' 7 | parent: 8 | type: numeric 9 | langs: 10 | - common 11 | name: Score (numeric) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/datetime/ru_dayofweek.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | country: 4 | - RU 5 | doc: Day of week written in Russian language 6 | id: rusdayofweek 7 | langs: 8 | - ru 9 | name: Day of week (in Russian) 10 | semantic_type: dayofweek 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_countryname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Name of the country written in Russian language. 6 | id: ruscountryname 7 | langs: 8 | - ru 9 | name: Country name (Russian) 10 | semantic_type: country 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_postalcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: 6 digits postal codes used in Russian Federation 6 | id: ruspostalcode 7 | langs: 8 | - common 9 | name: Russian postal code 10 | semantic_type: postindex 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_surname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Surname/Lastname in Russian 7 | id: russurname 8 | langs: 9 | - ru 10 | name: Surname/Lastname in Russian 11 | semantic_type: person_surname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/airlinecallsign.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: Unique call sign of the airline 4 | id: airlinecallsign 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Airline call sign 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/values/grossmargin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Gross margin, sub-type of money 5 | id: grossmargin 6 | is_pii: 'False' 7 | parent: 8 | type: money 9 | langs: 10 | - common 11 | name: Gross margin (money) 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/bic.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: bic 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: SWIFT/BIC code 8 | doc: Datatype for 'SWIFT/BIC code' (bic) from rules in context 'finances'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/medical/npi.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: npi 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: US NPI identifier 8 | doc: Datatype for 'US NPI identifier' (npi) from rules in context 'medical'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/values/numeric.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Any integer or float number 5 | id: numeric 6 | is_pii: 'False' 7 | links: 8 | - type: schema.org 9 | url: https://schema.org/Number 10 | langs: 11 | - common 12 | name: Numeric value 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_companyname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Russian written company/business name 6 | id: rucompanyname 7 | langs: 8 | - ru 9 | name: Company/business name in Russian 10 | semantic_type: orgname 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/month.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Month as number since start of the year. Could be from 1 to 12 4 | id: month 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Month 9 | patterns: 10 | - enmonth 11 | - enmonthshort 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/education/ukurn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - education 3 | id: ukurn 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: UK school URN 8 | doc: Datatype for 'UK school URN' (ukurn) from rules in context 'education'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/clabe.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: clabe 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: Mexican CLABE 8 | doc: Datatype for 'Mexican CLABE' (clabe) from rules in context 'finances'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/ifsc.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: ifsc 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: Indian IFSC code 8 | doc: Datatype for 'Indian IFSC code' (ifsc) from rules in context 'finances'. 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/street.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - pii 4 | - common 5 | doc: Name of the street, commonly used in address 6 | id: street 7 | is_pii: 'True' 8 | langs: 9 | - common 10 | name: Street 11 | patterns: 12 | - rustreet 13 | - russtreet 14 | links: [] 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/identifiers/id.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - identifiers 3 | doc: An unique identifier, local for DB/dataset or global and persistent 4 | id: id 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Unique identifier 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/vehicles/vehiclenumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: Number of the vehicle (vehicle plate number) 4 | id: vehiclenumber 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Vehicle number 9 | classification: identifier 10 | links: [] 11 | examples: [] 12 | wikidata_property: '' 13 | translations: {} 14 | regexp: '' 15 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_fullname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Fullname of the person in Russian 7 | id: rusfullname 8 | langs: 9 | - ru 10 | name: Fullname of the person in Russian 11 | semantic_type: person_fullname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/timerange.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Interval between two points of time, like 8:00-21:00 or 8:00AM-1:00PM. 4 | id: timerange 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Time range 9 | patterns: 10 | - rustimerange 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/medical/upin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: upin 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: US UPIN identifier 8 | doc: Datatype for 'US UPIN identifier' (upin) from rules in context 9 | 'medical'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/pii/ssn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | id: ssn 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: US Social Security Number 8 | doc: Datatype for 'US Social Security Number' (ssn) from rules in context 9 | 'pii'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/FR/persons/fr_fullname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - FR 6 | doc: Full name of the person written in French 7 | id: frfullname 8 | langs: 9 | - fr 10 | name: French full name of the person 11 | semantic_type: person_fullname 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_regioncode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Russian unique code of country subdivision (regions/federal subjects) 6 | id: rusregioncode 7 | langs: 8 | - ru 9 | name: Russian region unique code 10 | semantic_type: rusregion 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/education/ukprn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - education 3 | id: ukprn 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: UK provider UKPRN 8 | doc: Datatype for 'UK provider UKPRN' (ukprn) from rules in context 9 | 'education'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/aba.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: aba 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: US ABA routing number 8 | doc: Datatype for 'US ABA routing number' (aba) from rules in context 9 | 'finances'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/uksortcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: uksortcode 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: UK sort code 8 | doc: Datatype for 'UK sort code' (uksortcode) from rules in context 9 | 'finances'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/NL/companies/rsin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | id: rsin 4 | is_pii: 'False' 5 | langs: 6 | - nl 7 | name: RSIN op veldnaam 8 | doc: NL datatype for 'RSIN op veldnaam' (rsin) from rules in context 'companies'. 9 | classification: identifier 10 | country: 11 | - NL 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/common/category.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Object or entity category 4 | id: category 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Category 9 | translations: 10 | ru: 11 | doc: Категория объекта/сущности 12 | name: Категория 13 | classification: categorical 14 | links: [] 15 | examples: [] 16 | wikidata_property: '' 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/dayofmonth.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Month day, numeric value from 1 to 31 4 | id: dayofmonth 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Day of month 9 | translations: 10 | ru: 11 | doc: День месяца в значении от 1 до 31 12 | name: День месяца 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/education/studentid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - education 3 | id: studentid 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: Student identifier 8 | doc: Datatype for 'Student identifier' (studentid) from rules in context 9 | 'education'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/medical/cptcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: cptcode 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: CPT/HCPCS procedure code 8 | doc: Datatype for 'CPT/HCPCS procedure code' (cptcode) from rules in 9 | context 'medical'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/pii/ninumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | id: ninumber 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: UK National Insurance Number 8 | doc: Datatype for 'UK National Insurance Number' (ninumber) from rules 9 | in context 'pii'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_rnfi.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian government federal property unique ID (RNFI) 6 | id: rnfi 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian government property ID (RNFI) 11 | classification: identifier 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/education/ncesid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - education 3 | id: ncesid 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: US NCES school identifier 8 | doc: Datatype for 'US NCES school identifier' (ncesid) from rules in 9 | context 'education'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipaddr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | - pii 4 | doc: Internet Protocol version 4 (IPv4) or Internet Protocol version 6 (IPv6) address 5 | id: ipaddr 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: IPv4 or IPv6 address 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/medical/snomedcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: snomedcode 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: SNOMED CT concept code 8 | doc: Datatype for 'SNOMED CT concept code' (snomedcode) from rules in 9 | context 'medical'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/values/share.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - values 3 | - common 4 | doc: Share of the part as percentage as float number. Always between 0 and 100 percents. 5 | id: share 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | name: Share of value 10 | parent: 11 | type: percentage 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_regionname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Name of the region (federal subject) in Russia as it is in country Constitution 6 | id: rusregionname 7 | langs: 8 | - ru 9 | name: Russian region (federal subject) name 10 | semantic_type: rusregion 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/environment/gridnode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - environment 3 | id: gridnode 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Grid node identifier 8 | doc: Datatype for 'Grid node identifier' (gridnode) from rules in context 9 | 'environment'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/medical/gmcnumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: gmcnumber 4 | is_pii: 'True' 5 | langs: 6 | - en 7 | name: UK GMC registration number 8 | doc: Datatype for 'UK GMC registration number' (gmcnumber) from rules 9 | in context 'medical'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/username.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | doc: Name of the user or login, commonly used for authentication or user identification 5 | purposes 6 | id: username 7 | is_pii: 'True' 8 | langs: 9 | - common 10 | name: Username 11 | classification: identifier 12 | links: [] 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_workposition.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | country: 4 | - RU 5 | doc: Work positions / job title common in Russia and written in Russian language. 6 | id: rusworkposition 7 | langs: 8 | - ru 9 | name: Work position / Job title (Russian) 10 | semantic_type: jobtitle 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/p2wshaddr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | id: p2wshaddr 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | links: 8 | - type: other 9 | url: https://allprivatekeys.com/bitcoin-address-format 10 | name: P2WSH address 11 | doc: P2WSH address 12 | classification: identifier 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/p2wpkhaddr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | id: p2wpkhaddr 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | links: 8 | - type: other 9 | url: https://allprivatekeys.com/bitcoin-address-format 10 | name: P2WPKH address 11 | doc: P2WPKH address 12 | classification: identifier 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptography/sha1hash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptography 3 | doc: SHA-1 hash from file or data. Sometimes used as unique id of the data record 4 | id: sha1hash 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/SHA-1 11 | name: SHA-1 hash 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/environment/powerplantid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - environment 3 | id: powerplantid 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Power plant identifier 8 | doc: Datatype for 'Power plant identifier' (powerplantid) from rules 9 | in context 'environment'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/catransit.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: catransit 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | name: Canadian branch transit number 8 | doc: Datatype for 'Canadian branch transit number' (catransit) from 9 | rules in context 'finances'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/monthday.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Defines a part of a date - the month and day. Example (MM-DD). 4 | id: monthday 5 | is_pii: 'False' 6 | links: 7 | - type: other 8 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html 9 | langs: 10 | - common 11 | name: Month and day 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/environment/waterstation.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - environment 3 | id: waterstation 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Water quality station id 8 | doc: Datatype for 'Water quality station id' (waterstation) from rules 9 | in context 'environment'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/texts/headline.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | - texts 4 | doc: Headline of the article. 5 | id: headline 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/headline 12 | name: Headline 13 | translations: 14 | ru: 15 | doc: Заголовок статьи 16 | name: Заголовок 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptography/sha256hash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptography 3 | doc: SHA256 hash from file or data. Sometimes used as unique id of the data record 4 | id: sha256hash 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/SHA-2 11 | name: SHA256 hash 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/yearmonth.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Defines a part of a date - the year and month. Example YYYY-MM. 4 | id: yearmonth 5 | is_pii: 'False' 6 | links: 7 | - type: other 8 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html 9 | langs: 10 | - common 11 | name: Year and month 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/person_fullname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - pii 4 | doc: Full name of the person as combination of last, first, and sometimes middle name 5 | id: person_fullname 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: Person fullname 10 | patterns: 11 | - rusfullname 12 | - frfullname 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/any/shipping/tracknum.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | - shipping 4 | doc: Postal tracking number 5 | id: tracknum 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Tracking_number 12 | name: Tracking number (postal) 13 | classification: identifier 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/AU/persons/au_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - AU 6 | doc: Number of Australian National passport 7 | id: aupassport 8 | langs: 9 | - en 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Australian_passport 13 | name: Australian passport number 14 | semantic_type: passport 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Number of Russian foreign passport 7 | id: rupassport 8 | langs: 9 | - ru 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Russian_passport 13 | name: Russian passport foreign number 14 | semantic_type: passport 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/MX/geo/mx_state.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - MX 5 | doc: Mexican state/province by name or code 6 | id: mxstate 7 | is_pii: 'False' 8 | langs: 9 | - es 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/List_of_states_of_Mexico 13 | name: Mexican state (province) 14 | classification: categorical 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_mosdistricts.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Moscow city (Russia) districts 6 | id: rumosdistricts 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Moscow districts (Russia) 11 | translations: 12 | ru: 13 | doc: Округа города Москвы 14 | name: Округа города Москвы 15 | classification: categorical 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/FR/persons/fr_nir.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - FR 6 | doc: France's National ID number 7 | id: frnir 8 | is_pii: 'True' 9 | langs: 10 | - fr 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/INSEE_code 14 | name: France National ID number (social number) 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: Number of US Passport 7 | id: uspassport 8 | is_pii: 'True' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/United_States_passport 14 | name: US passport number 15 | semantic_type: passport 16 | regexp: '[0-9]{9}' 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/environment/emissioninventory.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - environment 3 | id: emissioninventory 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Emission inventory field by name 8 | doc: Datatype for 'Emission inventory field by name' (emissioninventory) from rules 9 | in context 'environment'. 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/en_countryname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: Name of the country in English 4 | id: encountryname 5 | langs: 6 | - common 7 | name: Country name (English) 8 | semantic_type: country 9 | examples: 10 | - value: Afghanistan 11 | description: Afghanistan (country name) 12 | - value: Mexico 13 | description: Mexico (country name) 14 | links: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/person_surname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - pii 4 | doc: Last name of the person. For example, Smith or Ivanova 5 | id: person_surname 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/familyName 12 | name: Person surname/lastname 13 | patterns: 14 | - russurname 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptography/md5hash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptography 3 | doc: Result of the MD5 hash function. Commonly used to generate hash of from file 4 | or data or to use as unique identifier 5 | id: md5hash 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/MD5 12 | name: MD5 hash 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/person_firstname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - pii 4 | doc: First name of the person. For example, Donald or Vladimir or Jack 5 | id: person_firstname 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/givenName 12 | name: Person firstname 13 | patterns: 14 | - rusfirstname 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/CA/geo/ca_province copy.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - CA 5 | doc: Provinces and territories of Canada 6 | id: caprovince 7 | is_pii: 'False' 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada 11 | langs: 12 | - en 13 | name: Province of Canada 14 | classification: categorical 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_intpassport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Number of Russian internal passport 7 | id: ruintpassport 8 | langs: 9 | - ru 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Internal_passport_of_Russia 13 | name: Russian internal passport number 14 | semantic_type: passport 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/nationality.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | doc: Nationality of the person from list of unique code from one of code lists or 5 | defined somehow else. 6 | id: nationality 7 | is_pii: 'True' 8 | langs: 9 | - common 10 | name: Person nationality 11 | classification: categorical 12 | patterns: 13 | - ennationality 14 | links: [] 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/texts/description.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Item, Object or entity description, common for any object 4 | id: description 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Description 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/description 12 | translations: 13 | ru: 14 | doc: Описание объекта или сущности 15 | name: Описание 16 | examples: [] 17 | wikidata_property: '' 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/science/scopusauthoirid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - science 4 | doc: identifier for an author assigned in Scopus bibliographic database 5 | id: scopusauthoirid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P1153 12 | name: Scopus author ID 13 | classification: identifier 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/any/texts/comment.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | - texts 4 | doc: Comment/note for the object or entity. 5 | id: comment 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/Comment 12 | name: Comment / Note 13 | translations: 14 | ru: 15 | doc: Комментарий или примечание к объекту 16 | name: Комментарий / примечание 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/GB/persons/uk_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - GB 6 | doc: Number of the passport issued by the United Kingdom 7 | id: ukpassport 8 | is_pii: 'True' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/British_passport 14 | name: British passport number 15 | semantic_type: passport 16 | regexp: '[0-9]{9}' 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_mosadmareas.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Moscow city (Russia) administrative areas 6 | id: rumosadmareas 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Moscow administrative areas (Russia) 11 | translations: 12 | ru: 13 | doc: Справочник районов города Москвы 14 | name: Районы города Москвы 15 | classification: categorical 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/TH/persons/th_idcard.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - TH 6 | doc: Number of the Thai identity card 7 | id: thidcard 8 | is_pii: 'True' 9 | langs: 10 | - th 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Thai_identity_card 14 | regexp: ^\b\d{1}-\d{4}-\d{5}-\d{2}-\d\b$ 15 | name: Thai identity card 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/year.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: A calendar year in western notation like 2022 or 1913 4 | id: year 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Year 11 | name: Year 12 | regexp: \d{4} 13 | examples: 14 | - value: '2022' 15 | description: Year 2022 16 | - value: '1243' 17 | description: Year 1243 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/CA/persons/ca_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - CA 6 | doc: Number of Canadian passport 7 | id: capassport 8 | is_pii: 'True' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Canadian_passport 14 | name: Canadian passport number 15 | semantic_type: passport 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/identifiers/guid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - identifiers 3 | doc: A universally unique identifier (UUID) is a 128-bit label used for information 4 | in computer systems. The term globally unique identifier (GUID) is also used. 5 | id: guid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | name: GUID (Globally unique identifier) 10 | classification: identifier 11 | links: [] 12 | examples: [] 13 | wikidata_property: '' 14 | translations: {} 15 | regexp: '' 16 | -------------------------------------------------------------------------------- /data/datatypes/ES/persons/es_driver_license.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - ES 6 | doc: Alphanumeric value of the Spain driver license 7 | id: esdriverlic 8 | is_pii: 'True' 9 | langs: 10 | - es 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Driving_licence_in_Spain 14 | name: Spain driver license number 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/dayofweek.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Week day name 4 | id: dayofweek 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Workweek_and_weekend 11 | name: Day of week 12 | patterns: 13 | - endayofweek 14 | - rusdayofweek 15 | translations: 16 | ru: 17 | doc: Название для недели 18 | name: День недели 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/deathday.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - datetime 4 | doc: Date of death of the person 5 | id: deathday 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Death_anniversary 12 | name: Date of death 13 | parent: 14 | type: date 15 | translations: 16 | ru: 17 | doc: Дата смерти человека 18 | name: Дата смерти 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/dublincore/dclanguage.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - dublincore 3 | doc: A language of the resource. 4 | id: dclanguage 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: dublincore 10 | url: http://purl.org/dc/terms/language 11 | name: Language (Dublin Core) 12 | examples: 13 | - value: eng 14 | description: English language 15 | translations: 16 | ru: 17 | doc: Язык ресурса 18 | name: Язык 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/iso3166code.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: identifier for a country subdivision per ISO 3166-2 (include country code) 4 | id: iso3166code 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P300 11 | name: identifier for a country subdivision per ISO 3166-2 12 | classification: categorical 13 | examples: [] 14 | wikidata_property: '' 15 | translations: {} 16 | regexp: '' 17 | -------------------------------------------------------------------------------- /data/datatypes/DE/persons/de_driver_license.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - DE 6 | doc: Alphanumeric value of the German driver license 7 | id: dedriverlic 8 | is_pii: 'True' 9 | langs: 10 | - de 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Driving_licence_in_Germany 14 | name: Germany driver license number 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/finances/ru_okvalpha.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | country: 4 | - RU 5 | doc: Russian currency codes (OKV classifier) 6 | id: okvalpha 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian currency code 11 | translations: 12 | ru: 13 | doc: Код валюты по справочнику ОКВ (Общероссийский классификатор валют) 14 | name: Код валюты по ОКВ 15 | semantic_type: currency 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/SE/persons/se_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - SE 6 | doc: Number of Sweden passport, 8-digits 7 | id: sepassport 8 | is_pii: 'True' 9 | langs: 10 | - se 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Swedish_passport 14 | regexp: ^[0-9]{8}$ 15 | name: Sweden passport number 16 | semantic_type: passport 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/iso4217curname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: ISO 4217 Currency name 4 | id: iso4217curname 5 | langs: 6 | - common 7 | links: 8 | - type: other 9 | url: https://www.iso.org/iso-4217-currency-codes.html 10 | name: ISO 4217 Currency name 11 | semantic_type: currency 12 | examples: 13 | - value: palladium 14 | description: palladium 15 | - value: euro 16 | description: euro 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/ES/persons/es_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - ES 6 | doc: Number of Spanish passport 7 | id: espassport 8 | is_pii: 'True' 9 | langs: 10 | - es 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Spanish_passport 14 | regexp: ^[A-z0-9]{2,3}[0-9]{6}$ 15 | name: Spanish passport number 16 | semantic_type: passport 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/FR/persons/fr_passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - FR 6 | doc: Number of French passport 7 | id: frpassport 8 | is_pii: 'True' 9 | langs: 10 | - fr 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/French_passport 14 | regexp: ^[0-9]{2}[A-z]{2}[0-9]{5}$ 15 | name: French passport number 16 | semantic_type: passport 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/finances/ru_okvname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | country: 4 | - RU 5 | doc: Russian currency names (OKV classifier) 6 | id: okvname 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian currency name 11 | translations: 12 | ru: 13 | doc: Название валюты по справочнику ОКВ (Общероссийский классификатор валют) 14 | name: Название валюты по ОКВ 15 | semantic_type: currency 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/common/name.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Name of the item, object or entity. 4 | id: name 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Name 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/name 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P2561 14 | wikidata_property: P2561 15 | translations: 16 | ru: 17 | doc: Название объекта или сущности 18 | name: Название 19 | examples: [] 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/quarter.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: A calendar quater of the year 4 | id: quarter 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Calendar_year#Quarters 11 | name: Quarter of the year 12 | regexp: '[1234]' 13 | examples: 14 | - value: '1' 15 | description: First quarter 16 | - value: '2' 17 | description: Secord quarter 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/software/programminglang.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: Programming language 4 | id: programminglang 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P277 11 | - type: schema.org 12 | url: https://schema.org/programmingLanguage 13 | name: Programming language 14 | wikidata_property: P277 15 | examples: 16 | - value: C 17 | description: Linux 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/airlinecode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: Unique code of the airline provided by IATA or ICAP 4 | id: airlinecode 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Airline_codes 11 | name: Airline code 12 | patterns: 13 | - icaoairlinecode 14 | - iataairlinecode 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/finances/ru_sbankaccount.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | country: 4 | - RU 5 | doc: A bank account is a financial account maintained by a bank or other financial 6 | institution in which the financial transactions between the bank and a customer 7 | are recorded. 8 | id: rusbankaccount 9 | langs: 10 | - ru 11 | name: Bank account in Russia 12 | semantic_type: bankaccount 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/date.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Date with one of known notations 4 | id: date 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Date_and_time_notation 11 | - type: schema.org 12 | url: https://schema.org/Date 13 | name: Date 14 | translations: 15 | ru: 16 | doc: Дата в одной из общепринятых нотаций 17 | name: Дата 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/timezone.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: A time zone is an area that observes a uniform standard time for legal, commercial 4 | and social purposes 5 | id: timezone 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Time_zone 12 | name: Time zone 13 | patterns: 14 | - ianatimezoneid 15 | - utctimezoneoffset 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/dublincore/dctitle.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - dublincore 3 | doc: A name given to the resource. 4 | id: dctitle 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: dublincore 10 | url: http://purl.org/dc/terms/title 11 | name: Title (Dublin Core) 12 | examples: 13 | - value: Picture of the cat 14 | description: Title of the resource 15 | translations: 16 | ru: 17 | doc: Имя данное ресурсу 18 | name: Заголовок 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/CA/persons/ca_on_ohip.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - CA 6 | doc: Canada Ontario Health Insurance Plan (OHIP) number 7 | id: caonohip 8 | is_pii: 'True' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Ontario_Health_Insurance_Plan 14 | name: Canada Ontario Health Insurance Plan (OHIP) number 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/industry/ru_okved.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | country: 4 | - RU 5 | doc: Russian economic classification code (OKVED) 6 | id: okved 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian economic classification code (OKVED) 11 | translations: 12 | ru: 13 | doc: Код по общероссийскому классификатору видов экономической деятельности 14 | name: Код ОКВЭД 15 | classification: categorical 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/software/filesize.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: size of a file 4 | id: filesize 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3575 11 | - type: schema.org 12 | url: https://schema.org/fileSize 13 | name: File size 14 | parent: 15 | type: datasize 16 | wikidata_property: P3575 17 | examples: 18 | - value: 839 megabyte 19 | description: FreeCAD 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/useraccounts/twitter_username.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - useraccounts 5 | doc: Person username in Twitter 6 | id: twitter_username 7 | is_pii: 'True' 8 | parent: 9 | type: username 10 | langs: 11 | - common 12 | wikidata_property: P2002 13 | links: 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P2002 16 | name: Twitter username 17 | regexp: '[0-9A-Za-z_]{1,20}' 18 | classification: identifier 19 | examples: [] 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/CA/geo/ca_province.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - CA 5 | doc: Provinces and territories of Canada alpha2 code 6 | id: caprovincecode 7 | is_pii: 'False' 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Provinces_and_territories_of_Canada 11 | langs: 12 | - en 13 | regexp: (NL|PE|NS|NB|QC|ON|MB|SK|AB|BC|YT|NT|NU) 14 | name: Province of Canada alpha2 code 15 | semantic_type: caprovince 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | -------------------------------------------------------------------------------- /data/datatypes/CA/persons/ca_driver_license.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - CA 6 | doc: Alphanumeric value of the Canada driver license 7 | id: cadriverlic 8 | is_pii: 'True' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Driver%27s_licences_in_Canada 14 | regexp: ^[A-Z](?:\d[- ]*){14}$ 15 | name: Canada driver license number 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/MX/geo/mx_iso3166_2.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - MX 5 | doc: ISO 3166-2:MX 3-letters alphanumeric codes used to identify Mexican states. 6 | id: iso3166_mx3a 7 | is_pii: 'False' 8 | langs: 9 | - es 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Template:Mexico_State-Abbreviation_Codes 13 | name: ISO 3166-2:MX (3-letters code) 14 | classification: categorical 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/RU/industry/ru_okpd.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | country: 4 | - RU 5 | doc: Russian product code (OKPD) used for government procurement and budget planning 6 | purposes 7 | id: okpd 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian product code (OKPD) 12 | translations: 13 | ru: 14 | doc: Код по Общероссийскому классификатору продукции 15 | name: Код ОКПД 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/common/hexcolor.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Color of subject 4 | id: hexcolor 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: sRGB color hex triplet (hex color) 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P465 12 | wikidata_property: P465 13 | translations: 14 | ru: 15 | doc: Цвет sRGB в шестнадцатеричной кодировке 16 | name: Цвет в шестнадцатеричной кодировке 17 | semantic_type: color 18 | examples: [] 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/useraccounts/skype_username.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - useraccounts 5 | doc: Person username in Skype 6 | id: skype_username 7 | is_pii: 'True' 8 | parent: 9 | type: username 10 | langs: 11 | - common 12 | name: Skype username 13 | regexp: '[a-zA-Z][a-zA-Z0-9_\-\,\.]{5,31}' 14 | wikidata_property: P2893 15 | links: 16 | - type: wikidata 17 | url: https://www.wikidata.org/wiki/Property:P2893 18 | classification: identifier 19 | examples: [] 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/medical/ru_medicinetradename.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | country: 4 | - RU 5 | doc: Trade name of the medicine in Russian. 6 | id: rumedicinetradename 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Medicine trade name (Russian) 11 | translations: 12 | ru: 13 | doc: Торговое название для лекарственных средств 14 | name: Торговое наименование лекарственного средства 15 | classification: identifier 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/common/color.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Color of subject 4 | id: color 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Color 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P462 12 | - type: schema.org 13 | url: https://schema.org/color 14 | wikidata_property: P462 15 | translations: 16 | ru: 17 | doc: Цвет как физическая характеристика объекта 18 | name: Цвет 19 | classification: categorical 20 | examples: [] 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/companies/iso6523.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | doc: An organization identifier as defined in ISO 6523(-1) 4 | is_pii: 'False' 5 | langs: 6 | - common 7 | links: 8 | - type: schema.org 9 | url: https://schema.org/iso6523Code 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/ISO/IEC_6523 12 | id: iso6523code 13 | name: ISO 6523 organization identifier 14 | classification: identifier 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/eui48.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | - pii 4 | doc: A media access control address (MAC address) is a unique identifier assigned 5 | to a network interface controller (NIC) for use as a network address in communications 6 | within a network segment. 7 | id: eui48 8 | is_pii: 'True' 9 | langs: 10 | - common 11 | name: MAC Address (EUI48) of the device 12 | classification: identifier 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/FR/geo/fr_epcicode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - FR 5 | doc: Etablissements publics de cooperation intercommunale (EPCI). French administrative 6 | geo code 7 | id: epcicode 8 | is_pii: 'False' 9 | langs: 10 | - fr 11 | links: 12 | - type: other 13 | url: https://www.insee.fr/fr/information/2510634 14 | name: Etablissements publics de cooperation intercommunale (EPCI) 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_feddistrict.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Name of the federal district of Russia. Full or short. 6 | id: rusfeddistrict 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian federal district name 11 | translations: 12 | ru: 13 | doc: Наименование федерального округа России полное или сокращённое 14 | name: Наименование федерального округа России 15 | classification: categorical 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_csrcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian budget target item of expenditure code 6 | id: csrcode 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian budget target item of expenditure code 11 | translations: 12 | ru: 13 | doc: Целевая статья расходов российского бюджета 14 | name: Целевая статья расходов российского бюджета 15 | classification: categorical 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipv6subnet.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: Range of IPv6 addresses 4 | id: ipv6subnet 5 | is_pii: 'True' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3793 11 | name: IPv4 routing prefix (subnet) 12 | wikidata_property: P3793 13 | regexp: '[\d:a-f\/]{4,30}' 14 | examples: 15 | - value: 2001:610::/29 16 | description: SURFNet 17 | - value: 2001:630:440::/44 18 | description: University of Oxford 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/science/researcherid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - science 3 | - persons 4 | doc: 'ResearcherID is an identifying system for scientific authors. The system was 5 | introduced in January 2008 by Thomson Reuters. ' 6 | id: researcherid 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/ResearcherID 13 | name: ResearcherID 14 | classification: identifier 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/GB/persons/uk_utr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - GB 6 | doc: A UTR (unique taxpayer reference) number is a 10-digit number completely unique 7 | to each and every UK taxpayer. 8 | id: ukutr 9 | is_pii: 'True' 10 | langs: 11 | - en 12 | links: 13 | - type: other 14 | url: https://www.gov.uk/find-lost-utr-number 15 | name: Unique Taxpayer Reference (UTR) 16 | regexp: '[0-9]{10}' 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/any/dublincore/dcabstract.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - dublincore 3 | doc: A summary of the resource. 4 | id: dcabstract 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: dublincore 10 | url: http://purl.org/dc/terms/abstract 11 | name: Abstract (Dublin Core) 12 | examples: 13 | - value: A summary of the text. 14 | description: A summary of the text. 15 | translations: 16 | ru: 17 | doc: Краткое изложение ресурса. 18 | name: Кратное изложение 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/tools/pii/piicatcher.yaml: -------------------------------------------------------------------------------- 1 | id: piicatcher 2 | category: pii 3 | name: PII Catcher 4 | doc: 'PIICatcher is a scanner for PII and PHI information. It finds PII data in your databases and file systems and tracks critical data.' 5 | website: https://github.com/tokern/piicatcher 6 | supported_types: 7 | - email 8 | - birthday 9 | - gender 10 | - nationality 11 | - person_fullaname 12 | - person_firstname 13 | - person_lastname 14 | - username 15 | - password 16 | - address 17 | - usssn 18 | -------------------------------------------------------------------------------- /.idea/metacrafter-registry.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /data/datatypes/AU/finances/au_bsb.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: A bank state branch is the name used in Australia for a bank code, which is a 4 | branch identifier. 5 | id: aubsb 6 | is_pii: 'False' 7 | langs: 8 | - en 9 | country: 10 | - AU 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Bank_state_branch 14 | regexp: ^[0-9]{3}-?[0-9]{3}$ 15 | name: Australia bank state branch (BSB) code 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/asn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: autonomous system number 4 | id: asn 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3797 11 | name: Autonomous system number (ASN) 12 | wikidata_property: P3797 13 | regexp: ' [1-9]\d*' 14 | examples: 15 | - value: '2532' 16 | description: Library of Congress 17 | - value: '174' 18 | description: Cogent Communications 19 | classification: identifier 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/en_nationality.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | doc: Nationality of the person written in English 5 | id: ennationality 6 | is_pii: 'True' 7 | langs: 8 | - common 9 | name: Person nationality in English 10 | classification: categorical 11 | semantic_type: nationality 12 | examples: 13 | - value: Belorussian 14 | description: Belorussian nationality 15 | - value: Spanish 16 | description: Spanish nationality 17 | links: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/passport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | doc: Person passport, usually unique personal information that should be shared online. 5 | Identifies personal document issued to person by government authority 6 | id: passport 7 | is_pii: 'True' 8 | langs: 9 | - common 10 | name: Person passport number (universal) 11 | patterns: 12 | - ruintpassport 13 | - uspassport 14 | classification: identifier 15 | links: [] 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/software/imphash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: hash based on PE file imports (MD5 hash) 4 | id: imphash 5 | is_pii: 'False' 6 | parent: 7 | type: md5hash 8 | langs: 9 | - common 10 | links: 11 | - type: other 12 | url: https://developers.virustotal.com/reference/pe_info 13 | name: imphash 14 | examples: 15 | - value: 316cd668ed705c998eae8d3bd7bd168f 16 | description: Virus total file Twain_32.dll 17 | classification: identifier 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_npa.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: 'Russian regulatory legal act. For example: Executive order' 6 | id: runpa 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian regulatory legal act 11 | parent: 12 | type: legislationname 13 | translations: 14 | ru: 15 | doc: Нормативно-правовой документ в России. Например, постановление правительства 16 | name: Нормативно-правовой документ 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/software/datasize.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: size of a software, dataset, neural network, or individual file 4 | id: datasize 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3575 11 | name: Data size 12 | wikidata_property: P3575 13 | examples: 14 | - value: 839 megabyte 15 | description: FreeCAD 16 | - value: 5.15 gigabyte 17 | description: Sly Cooper and the Thievius Raccoonus 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/useraccounts/github_username.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - useraccounts 5 | doc: Person/organization username in Github 6 | id: github_username 7 | is_pii: 'True' 8 | parent: 9 | type: username 10 | langs: 11 | - common 12 | wikidata_property: P2037 13 | links: 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P2037 16 | name: Github username 17 | regexp: '[0-9A-Za-z]([0-9A-Za-z\-]{0,37}[0-9A-Za-z])?' 18 | classification: identifier 19 | examples: [] 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/ES/persons/es_nif_number.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - ES 6 | doc: Spanish tax identification number used to identify tax paying individuals 7 | id: esnif 8 | is_pii: 'True' 9 | langs: 10 | - es 11 | links: 12 | - type: wikipedia 13 | url: https://es.wikipedia.org/wiki/N%C3%BAmero_de_identificaci%C3%B3n_fiscal 14 | regexp: '[0-9]?[0-9]{7}[-]?[A-Z]' 15 | name: Spanish Tax identification number 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_okogu.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | country: 4 | - RU 5 | doc: Russian government and governance code. OKOGU classifier 6 | id: okogu 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian government and governance code (OKOGU) 11 | translations: 12 | ru: 13 | doc: ОКОГУ - это общероссийский классификатор органов государственной власти 14 | и управления 15 | name: Код ОКОГУ 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/geonamesid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: identifier in the GeoNames geographical database 4 | id: geonamesid 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P1566 11 | name: Geonames ID 12 | wikidata_property: P1566 13 | regexp: '[1-9][0-9]{0,8}|' 14 | examples: 15 | - value: '935877' 16 | description: Piton de la Fournaise 17 | - value: '1277082' 18 | description: Baranagar 19 | classification: identifier 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipv4.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | - pii 4 | doc: Internet Protocol version 4 (IPv4) is the fourth version of the Internet Protocol 5 | (IP). It is one of the core protocols of standards-based internetworking methods 6 | in the Internet and other packet-switched networks. 7 | id: ipv4 8 | is_pii: 'True' 9 | langs: 10 | - common 11 | name: Internet Protocol version 4 (IPv4) 12 | classification: identifier 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/tools/other/soda.yaml: -------------------------------------------------------------------------------- 1 | id: soda 2 | category: other 3 | name: Soda 4 | doc: 'Soda allows everyone on your data team to find, analyze, and resolve data issues. Our open-source tools and data observability platform bring everyone closer to the data, resulting in data products that you can trust.' 5 | website: https://docs.soda.io/soda-sql/sql_metrics.html#valid-format-values 6 | supported_types: 7 | - date 8 | - email 9 | - ipv4 10 | - ipv6 11 | - ipaddr 12 | - time 13 | - phone 14 | - uuid 15 | 16 | -------------------------------------------------------------------------------- /data/datatypes/ES/geo/es_postcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - ES 5 | - MX 6 | doc: Spanish postal code (by language) 7 | id: espostcode 8 | langs: 9 | - es 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Postal_codes_in_Spain 13 | name: Spanish postal code (by language) 14 | semantic_type: postindex 15 | regexp: \d{5} 16 | examples: 17 | - value: '07002' 18 | description: Palma, Majorca 19 | - value: '27722' 20 | description: Veigas, Asturias 21 | wikidata_property: '' 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/RU/medical/ru_medicineregnum.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | country: 4 | - RU 5 | doc: Russian medicine registration code 6 | id: rumedicineregnum 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Registration code of medicine (Russian) 11 | translations: 12 | ru: 13 | doc: Код регистрации лекарственного средства в России. Реестр Росздравнадзора 14 | name: Код регистрации лекарственного средства в России 15 | classification: identifier 16 | links: [] 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/datetime.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Date and time with one of known notations 4 | id: datetime 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Date_and_time_notation 11 | - type: schema.org 12 | url: https://schema.org/DateTime 13 | name: Date and time 14 | translations: 15 | ru: 16 | doc: Дата и время в одной из общепринятых нотаций 17 | name: Дата и время 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/software/telfhash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: Files Trend Micro ELF Hash (aka telfhash) 4 | id: telfhash 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://developers.virustotal.com/reference/file-telfhash 11 | name: telfhash 12 | examples: 13 | - value: t167319f0a1c160d81cb547cbc383bfae309821ae56faaa74dba48b425b7f51c1903f5f5 14 | description: Virus total file libmongocrypt.so 15 | classification: identifier 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/useraccounts/instagram_username.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - useraccounts 5 | doc: Person username in Instagram 6 | id: instagram_username 7 | is_pii: 'True' 8 | parent: 9 | type: username 10 | langs: 11 | - common 12 | wikidata_property: P2003 13 | links: 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P2003 16 | name: Instagram username 17 | regexp: ([0-9a-z_](?:(?:[0-9a-z_]|(?:\.(?!\.))){0,28}(?:[0-9a-z_]))?) 18 | classification: identifier 19 | examples: [] 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/CA/persons/ca_sin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - CA 6 | doc: 'Social insurance number (SIN) is a number issued in Canada to administer various 7 | government programs. ' 8 | id: casin 9 | is_pii: 'True' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Social_insurance_number 15 | regexp: (\d{3}-\d{3}-\d{3})|(\d{9}) 16 | name: Canada social insurance number (SIN) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_okopfname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Type of business entity in Russia, classifier OKOPF 6 | id: okopfname 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Type of business entity (Russian, OKOPF) 11 | translations: 12 | ru: 13 | doc: Наименование организационно-правовой формы юридического лица по справочнику 14 | ОКОПФ 15 | name: Организационно-правовая форма 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/identifiers/mongodbid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: MongoDB unique id assigned to each row of each collection in MongoDB as field 4 | _id 5 | id: mongodbid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | name: MongoDB unique Object ID 10 | examples: 11 | - value: 507f1f77bcf86cd799439011 12 | description: Uninue MongoDB ObjectID 13 | - value: 507f191e810c19729de860ea 14 | description: Uninue MongoDB ObjectID 15 | classification: identifier 16 | links: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/FR/geo/fr_postcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - FR 5 | doc: French postal code 6 | id: frpostcode 7 | langs: 8 | - fr 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Postal_codes_in_France 12 | - type: other 13 | url: https://www.wikidata.org/wiki/Q1105640 14 | name: French postal code 15 | semantic_type: postindex 16 | regexp: \d{5} 17 | examples: 18 | - value: '75008' 19 | description: Paris 20 | - value: '97439' 21 | description: Sainte-Rose 22 | wikidata_property: '' 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/iso4217curcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: ISO 4217 Currency code 4 | id: iso4217curcode 5 | langs: 6 | - common 7 | links: 8 | - type: other 9 | url: https://www.iso.org/iso-4217-currency-codes.html 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P498 12 | name: ISO 4217 Currency code 13 | semantic_type: currency 14 | wikidata_property: P498 15 | regexp: '[A-Z]{3}' 16 | examples: 17 | - value: XPD 18 | description: palladium 19 | - value: EUR 20 | description: euro 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/any/telecom/imsi.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - telecom 3 | - pii 4 | doc: The international mobile subscriber identity (IMSI) is a number that uniquely 5 | identifies every user of a cellular network. 6 | id: imsi 7 | is_pii: 'True' 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/International_mobile_subscriber_identity 11 | langs: 12 | - common 13 | name: The international mobile subscriber identity (IMSI) 14 | classification: identifier 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/tools/other/auctus.yaml: -------------------------------------------------------------------------------- 1 | id: auctus 2 | category: other 3 | name: Auctus 4 | doc: 'This project is a web crawler and search engine for datasets, specifically meant for data augmentation tasks in machine learning. It is able to find datasets in different repositories and index them for later retrieval.' 5 | website: https://gitlab.com/ViDA-NYU/auctus/auctus 6 | supported_types: 7 | - datetime 8 | - id 9 | - latitude 10 | - longitude 11 | - geopoint 12 | - address 13 | - url 14 | - filename 15 | - boolean 16 | 17 | -------------------------------------------------------------------------------- /data/datatypes/GB/geo/uk_postalcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - GB 5 | doc: Postal codes used in the United Kingdom, British Overseas Territories and Crown 6 | dependencies are known as postcodes (originally, postal codes). 7 | id: ukpostalcode 8 | langs: 9 | - en 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom 13 | name: UK Postal code 14 | semantic_type: postindex 15 | classification: categorical 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_ikuiko.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | - companies 4 | country: 5 | - RU 6 | doc: Unique code assigned to all government customers in Russian Federation 7 | id: ruikuiko 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian government customers unique code 12 | translations: 13 | ru: 14 | doc: Код присваиваемый государственным заказчикам в Российской Федерации 15 | name: Идентификационный код заказчика 16 | classification: identifier 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_ikz.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | - companies 4 | country: 5 | - RU 6 | doc: Unique id assigned to all government procurement procedures in Russian Federation 7 | id: ruikz 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian government procurement unique id 12 | translations: 13 | ru: 14 | doc: Код присваиваемый государственным закупкам в Российской Федерации 15 | name: Идентификационный код закупки 16 | classification: identifier 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/wifaddrcomp.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | id: wifaddrcomp 4 | doc: WIF, compressed pubkey 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://allprivatekeys.com/bitcoin-address-format 11 | name: WIF, compressed pubkey 12 | examples: 13 | - value: L1aW4aubDFB7yfras2S1mN3bqg9nwySY8nkoLmJebSLD5BWv3ENZ 14 | description: Example from https://allprivatekeys.com/bitcoin-address-format 15 | classification: identifier 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipv6.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | - pii 4 | doc: Internet Protocol version 6 (IPv6) is the most recent version of the Internet 5 | Protocol (IP), the communications protocol that provides an identification and location 6 | system for computers on networks and routes traffic across the Internet. 7 | id: ipv6 8 | is_pii: 'True' 9 | langs: 10 | - common 11 | name: Internet Protocol version 6 (IPv6) 12 | classification: identifier 13 | links: [] 14 | examples: [] 15 | wikidata_property: '' 16 | translations: {} 17 | regexp: '' 18 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_okopf.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | - common 4 | country: 5 | - RU 6 | doc: Code of type of business entity in Russia, classifier OKOPF 7 | id: okopf 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Code of type of business entity (Russian, OKOPF) 12 | translations: 13 | ru: 14 | doc: Код организационно-правовой формы юридического лица по справочнику ОКОПФ 15 | name: Код организационно-правовая формы по ОКОПФ 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/US/industry/us_naicscode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - industry 3 | country: 4 | - US 5 | doc: Classification in the North American Industry Classification System 6 | id: naicscode 7 | is_pii: 'False' 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3224 11 | langs: 12 | - en 13 | name: NAICS code 14 | wikidata_property: P3224 15 | regexp: \d{2,6} 16 | examples: 17 | - value: '517919' 18 | description: CelerSMS 19 | - value: '512240' 20 | description: recording studio 21 | classification: categorical 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/wifaddruncomp.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | id: wifaddruncomp 4 | doc: WIF, uncompressed pubkey 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://allprivatekeys.com/bitcoin-address-format 11 | name: WIF, uncompressed pubkey 12 | examples: 13 | - value: 5Hwgr3u458GLafKBgxtssHSPqJnYoGrSzgQsPwLFhLNYskDPyyA 14 | description: Example from https://allprivatekeys.com/bitcoin-address-format 15 | classification: identifier 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/continent.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: A continent is any of several large landmasses. Related to UN unm49 dataclass 4 | id: continent 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Continent 11 | - type: wikidata 12 | url: https://www.wikidata.org/wiki/Property:P30 13 | wikidata_property: P30 14 | examples: 15 | - value: Eurasia 16 | description: Eurasia 17 | name: Continent name or identifier 18 | classification: categorical 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/government/legislationname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | doc: A legal document such as an act, decree, bill, etc. (enforceable or not) or a 4 | component of a legal act (like an article). 5 | id: legislationname 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/Legislation 12 | name: Legislation name 13 | translations: 14 | ru: 15 | doc: Название нормативного документа 16 | name: Название нормативного документа 17 | examples: [] 18 | wikidata_property: '' 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/langs.yaml: -------------------------------------------------------------------------------- 1 | - id: common 2 | name: Common to most languages 3 | - id: en 4 | name: English 5 | - id: ru 6 | name: Russian 7 | - id: fr 8 | name: French 9 | - id: es 10 | name: Spanish 11 | - id: it 12 | name: Italian 13 | - id: pt 14 | name: Portugenese 15 | - id: da 16 | name: Danish 17 | - id: fi 18 | name: Finnish 19 | - id: se 20 | name: Swedish 21 | - id: th 22 | name: Thai 23 | - id: de 24 | name: German 25 | 26 | # Added to support languages referenced in datatype definitions 27 | - id: nl 28 | name: Dutch -------------------------------------------------------------------------------- /data/datatypes/any/science/openalexid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - science 3 | - identifiers 4 | doc: Identifier for works, authors, institutes, venues, concepts/subjects in OpenAlex 5 | id: openalexid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: other 11 | url: https://docs.openalex.org/about-the-data#the-openalex-id 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P10283 14 | name: OpenAlex ID 15 | regexp: '[ACIVW][1-9]\d{3,9}' 16 | wikidata_property: P10283 17 | classification: identifier 18 | examples: [] 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/software/tlsh.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: tlsh is a hash used by Trend Micro which can be used for similarity comparisons. 4 | id: tlsh 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://developers.virustotal.com/reference/files-tlsh 11 | name: TLSH hash 12 | examples: 13 | - value: T18F535B52F19146B7CD502278DF2CEB3199BFE134871816E3634882B6576B0D1AB7E3CA 14 | description: Virus total file Twain_32.dll 15 | classification: identifier 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/GB/geo/uk_wardcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - GB 5 | doc: The wards and electoral divisions in the United Kingdom are electoral districts 6 | at sub-national level represented by one or more councillors. 7 | id: ukwardcode 8 | is_pii: 'False' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Wards_and_electoral_divisions_of_the_United_Kingdom 14 | name: Ward code (United Kingdom) 15 | classification: categorical 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/GB/geo/uk_wardname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - GB 5 | doc: The wards and electoral divisions in the United Kingdom are electoral districts 6 | at sub-national level represented by one or more councillors. 7 | id: ukwardname 8 | is_pii: 'False' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Wards_and_electoral_divisions_of_the_United_Kingdom 14 | name: Ward name (United Kingdom) 15 | classification: categorical 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/US/telecom/fccid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - telecom 3 | doc: Identifier of a product assigned by the grantee in an application to the Federal 4 | Communications Commission of the United States 5 | id: fccid 6 | is_pii: 'False' 7 | country: 8 | - US 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P7290 12 | langs: 13 | - en 14 | wikidata_property: P7290 15 | name: FCC Product Code (FCC ID) 16 | examples: 17 | - value: A1395 18 | description: iPad 2, model A1395 19 | classification: identifier 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/utctimezoneoffiset.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Difference between Coordinated Universal Time (UTC) and this timezone. 4 | id: utctimezoneoffset 5 | langs: 6 | - common 7 | links: 8 | - type: wikidata 9 | url: https://www.wikidata.org/wiki/Property:P2907 10 | name: UTC Timezone offset 11 | semantic_type: timezone 12 | wikidata_property: P2907 13 | regexp: ^UTC(?:Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])$ 14 | examples: 15 | - value: UTC+08:00 16 | description: 8 hour 17 | - value: UTC+01:24 18 | description: 1.4 hour 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/any/software/vhash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: Virustotal in-house similarity clustering algorithm value, based on a simple 4 | structural feature hash allows you to find similar files 5 | id: vhash 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: other 11 | url: https://developers.virustotal.com/reference/files 12 | name: VHash 13 | examples: 14 | - value: 1640566d1555156az3b26kz1fez5 15 | description: Virus total file Twain_32.dll 16 | classification: identifier 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/FR/persons/fr_cni.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - FR 6 | doc: The French national identity card (CNI) is an official identity document consisting 7 | of an electronic ID-1 card bearing a photograph, name and address. 8 | id: frcni 9 | is_pii: 'True' 10 | langs: 11 | - fr 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/National_identity_card_(France) 15 | regexp: ^[0-9]{12}$ 16 | name: French national identity card 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/FR/geo/fr_siretcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - FR 5 | doc: 'The SIRET code (French: Système d’identification du répertoire des établissements), 6 | or SIRET number,[1] is an INSEE code which allows the geographic identification 7 | of any French establishment or business. ' 8 | id: siretcode 9 | is_pii: 'False' 10 | langs: 11 | - fr 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/SIRET_code 15 | name: SIRET Code 16 | classification: categorical 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_budgetname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Name of the budget of Russian government at federal, regional or local level 6 | id: budgetname 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian government budget name 11 | translations: 12 | ru: 13 | doc: Название бюджета по справочнику наименований и кодов бюджета ведомом Минфином 14 | России и Федеральным казначейством. 15 | name: Наименование бюджета 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/companies/en_companyname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | doc: English written company/business/legal name of business entity 4 | id: encompanyname 5 | langs: 6 | - en 7 | name: Company/business name in English 8 | semantic_type: orgname 9 | examples: 10 | - value: AARDMAN ANIMATIONS LIMITED 11 | description: AARDMAN ANIMATIONS LIMITED 12 | - value: ULSTER INNOVATION FUND LP 13 | description: ULSTER INNOVATION FUND LP 14 | - value: GREEN HILL TRUST 15 | description: GREEN HILL TRUST 16 | links: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/persons/jobtitle.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - companies 4 | country: 5 | - RU 6 | doc: Work positions / job title 7 | id: jobtitle 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | links: 12 | - type: schema.org 13 | url: https://schema.org/jobTitle 14 | name: Work position / Job title 15 | patterns: 16 | - rusworkposition 17 | translations: 18 | ru: 19 | doc: Должность персоны в организации 20 | name: Должность 21 | examples: 22 | - value: Aid worker/humanitarian worker 23 | description: Aid worker job title 24 | wikidata_property: '' 25 | regexp: '' 26 | -------------------------------------------------------------------------------- /data/datatypes/AU/companies/au_acn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - AU 5 | doc: An Australian Company Number (usually shortened to ACN) is a unique identifier 6 | required by every company registered under Australia’s Corporations Act 2001 (Cth). 7 | id: auacn 8 | is_pii: 'False' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Australian_Company_Number 14 | regexp: \d{3}\s\d{3}\s\d{3} 15 | name: Australian Company Number (ACN) 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_inn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Russian organization/person tax identification number (INN) is unique for each 6 | person and organization. 7 | id: inn 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/VAT_identification_number 14 | name: INN (Russian tax identifier) 15 | regexp: \d{10} 16 | examples: 17 | - value: '' 18 | description: '' 19 | - value: '' 20 | description: '' 21 | classification: identifier 22 | wikidata_property: '' 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/US/finances/us_bankaccount.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | country: 4 | - RU 5 | doc: A bank account in the financial institutuin in United States 6 | id: usbankaccount 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Bank_account 13 | name: Bank account in United States 14 | regexp: '[0-9]{8,17}' 15 | translations: 16 | ru: 17 | doc: Вид банковского счета открываемого в банках США 18 | name: Банковский счет в банке США 19 | semantic_type: bankaccount 20 | examples: [] 21 | wikidata_property: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/ianatimezoneid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Difference between Coordinated Universal Time (UTC) and this timezone. 4 | id: ianatimezoneid 5 | langs: 6 | - common 7 | links: 8 | - type: wikidata 9 | url: https://www.wikidata.org/wiki/Property:P6687 10 | name: IANA Timezone ID 11 | semantic_type: timezone 12 | wikidata_property: P6687 13 | regexp: ^UTC(?:Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])$ 14 | examples: 15 | - value: Australia/Sydney 16 | description: Australia/Sydney 17 | - value: Europe/London 18 | description: Europe/London 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/BR/persons/br_cpf.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - BR 6 | doc: The CPF number (Cadastro de Pessoas Fisicas; Portuguese for Natural Persons Register) 7 | is the Brazilian individual taxpayer registry, since its creation in 1965. 8 | id: brcpf 9 | is_pii: 'True' 10 | langs: 11 | - pt 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/CPF_number 15 | regexp: \d{3}\.\d{3}\.\d{3}\-\d{2} 16 | name: CPF number (Cadastro de Pessoas Fisicas) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/FI/persons/fi_natid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - FI 6 | doc: The Finnish identity card is one of two official identity documents in Finland, 7 | the other being the Finnish passport. 8 | id: fiidcard 9 | is_pii: 'True' 10 | langs: 11 | - fi 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Finnish_identity_card 15 | regexp: ^(0[1-9]|[1-2][0-9]|3[0-1])(0[1-9]|1[0-2])[0-9]{2}[a+-][0-9]{3}[A-z0-9]$ 16 | name: Finnish identity card 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/GB/finances/uk_sedol.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: SEDOL stands for Stock Exchange Daily Official List, a list of security identifiers 4 | used in the United Kingdom and Ireland for clearing purposes. 5 | id: sedol 6 | is_pii: 'False' 7 | country: 8 | - GB 9 | - IE 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/SEDOL 15 | name: SEDOL identifier 16 | regexp: '[0-9.]{7}' 17 | examples: 18 | - value: 0263494 19 | description: BAE Systems 20 | classification: identifier 21 | wikidata_property: '' 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/currency.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: A currency in the most specific sense is money in any form when in use or circulation 4 | as a medium of exchange, especially circulating banknotes and coins. 5 | id: currency 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Currency 12 | name: Currency 13 | patterns: 14 | - iso4217curcode 15 | - iso4217curname 16 | - okvname 17 | - okvcode 18 | classification: categorical 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/tld.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: A top-level domain (TLD) is one of the domains at the highest level in the hierarchical 4 | Domain Name System of the Internet after the root domain.[10] The top-level domain 5 | names are installed in the root zone of the name space. 6 | id: tld 7 | is_pii: 'False' 8 | langs: 9 | - en 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Top-level_domain 13 | name: Top level domain (TLD) 14 | classification: identifier 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/any/science/academicdegree.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - persons 3 | - science 4 | doc: Academic degree title 5 | id: academicdegree 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Academic_degree 12 | name: Academic degree 13 | patterns: 14 | - rusdegree 15 | translations: 16 | ru: 17 | doc: Научная степень персоны на русском языке. Например, кандидат технических 18 | наук 19 | name: Научная степень (на русском) 20 | classification: categorical 21 | examples: [] 22 | wikidata_property: '' 23 | regexp: '' 24 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/wbregion.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: World Bank regions are territorial classification of countries 4 | id: wbregion 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups 11 | wikidata_property: P30 12 | examples: 13 | - value: South Asia 14 | description: South Asia 15 | - value: Europe & Central Asia 16 | description: Europe & Central Asia 17 | name: World Bank region 18 | classification: categorical 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/telecom/imei.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - telecom 3 | - pii 4 | doc: The International Mobile Equipment Identity (IMEI) is a numeric identifier, usually 5 | unique for 3GPP and iDEN mobile phones, as well as some satellite phones. 6 | id: imei 7 | is_pii: 'True' 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/International_Mobile_Equipment_Identity 11 | langs: 12 | - common 13 | regexp: \d{2}-\d{6}-\d{6}-\d\d? 14 | name: International Mobile Equipment Identity (IMEI) 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | -------------------------------------------------------------------------------- /data/datatypes/BE/persons/be_natcardnum.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - BE 6 | doc: 'Belgium identity card is a national identity card issued to all citizens of 7 | Belgium aged 12 years old and above. ' 8 | id: benatcardid 9 | is_pii: 'True' 10 | langs: 11 | - fr 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Belgian_identity_card 15 | name: Belgium national card ID 16 | regexp: '[0-9]{2}\.(0[1-9]|1[0-2])\.(0[1-9]|[1-2][0-9]|3[0-1])-[0-9]{3}\.[0-9]{2}' 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptography/sct.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptography 3 | doc: Signed certificate timestamp (SCT). The SCT is the log's promise to incorporate 4 | the certificate in the Merkle Tree within a fixed amount of time known as the Maximum 5 | Merge Delay (MMD 6 | id: sctcrypto 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: other 12 | url: https://www.rfc-editor.org/rfc/rfc6962.html 13 | regexp: ^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$ 14 | name: Signed certificate timestamp (SCT) 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | -------------------------------------------------------------------------------- /data/datatypes/any/objectids/wikidataid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | doc: Wikidata makes use of identifiers for both internal organization of the knowledge 4 | base and for its connection to other databases. 5 | id: wikidataid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Wikidata:Identifiers 12 | name: Wikidata Id 13 | examples: 14 | - value: Q12345 15 | description: Count von Count 16 | - value: Q234511 17 | description: Gurk 18 | classification: identifier 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/schemes/tool.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": { 3 | "required": true, 4 | "type": "string" 5 | }, 6 | "id": { 7 | "required": true, 8 | "type": "string" 9 | }, 10 | "doc": { 11 | "required": true, 12 | "type": "string" 13 | }, 14 | "category": { 15 | "required": true, 16 | "type": "string" 17 | }, 18 | "website": { 19 | "required": false, 20 | "type": "string" 21 | }, 22 | "supported_types": { 23 | "required": false, 24 | "type": "list" 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /data/datatypes/GB/geo/uk_toid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - GB 5 | doc: TOpographic IDentifier assigned by the Ordnance Survey to identify a feature 6 | in Great Britain 7 | id: toid 8 | is_pii: 'False' 9 | langs: 10 | - en 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/TOID 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P3120 16 | wikidata_property: P3120 17 | name: UK TOpographic IDentifier (TOID) 18 | classification: identifier 19 | examples: 20 | - value: '7000000000013965' 21 | description: Surrey 22 | regexp: \d{16} 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/SG/persons/sg_nric.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - SG 6 | doc: The National Registration Identity Card (NRIC) is the compulsory identity document 7 | issued to citizens and permanent residents of Singapore. 8 | id: sgnric 9 | is_pii: 'True' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/National_Registration_Identity_Card 15 | regexp: (?i)([STFG][0-9]{7}[A-Z]) 16 | name: The Singapore National Registration Identity Card (NRIC) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/tools/pii/scrubadub.yaml: -------------------------------------------------------------------------------- 1 | id: scrubadub 2 | category: pii 3 | name: scrubadub 4 | doc: 'Remove personally identifiable information from free text. Sometimes we have additional metadata about the people we wish to anonymize. Other times we dont. This package makes it easy to seamlessly scrub personal information from free text, without compromising the privacy of the people we are trying to protect.`' 5 | website: https://github.com/LeapBeyond/scrubadub 6 | supported_types: 7 | - email 8 | - phone 9 | - birthday 10 | - bankcard 11 | - postindex 12 | - address 13 | - person_fullname 14 | -------------------------------------------------------------------------------- /data/datatypes/AU/companies/au_abn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - AU 5 | doc: The Australian Business Number (ABN) is a unique 11-digit identifier issued by 6 | the Australian Business Register (ABR) which is operated by the Australian Taxation 7 | Office (ATO). 8 | id: auabn 9 | is_pii: 'False' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Australian_Business_Number 15 | regexp: \d{2}\s\d{3}\s\d{3}\s\d{3} 16 | name: Australian Business Number (ABN) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/AU/persons/au_tfn_number.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - AU 6 | doc: The tax file number (TFN) is a unique identifier issued by the Australian Taxation 7 | Office to each taxpaying entity — an individual, company, superannuation fund, partnership, 8 | or trust. 9 | id: autfn 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Tax_file_number 16 | regexp: \d{3}\s\d{3}\s\d{3} 17 | name: Australian Tax File Number (TFN) 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/EU/industry/eu_cpvcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - industry 3 | country: 4 | - EU 5 | doc: massively multilingual and public domain taxonomy legislated by the EU for goods 6 | and services 7 | id: cpvcode 8 | is_pii: 'False' 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P5417 12 | langs: 13 | - en 14 | name: CPV (Common Procurement Vocabulary) code 15 | wikidata_property: P5417 16 | regexp: \d{8} 17 | examples: 18 | - value: '24110000' 19 | description: industrial gas 20 | - value: '24111700' 21 | description: nitrogen 22 | classification: categorical 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/GB/medical/uk_bnfcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | country: 4 | - GB 5 | doc: The British National Formulary (BNF) is a reference book containing the standard 6 | list of medicines used in UK prescribing. It gives information on the indications, 7 | dosages and side effects for over 70,000 medicines. 8 | id: ukbnfcode 9 | is_pii: 'False' 10 | langs: 11 | - en 12 | links: 13 | - type: other 14 | url: https://www.thedatalab.org/blog/2017/04/prescribing-data-bnf-codes/ 15 | name: BNF code 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/GB/persons/uk_nino.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - GB 6 | doc: The National Insurance number is a number used in the United Kingdom in the administration 7 | of the National Insurance or social security system. It is also used for some purposes 8 | in the UK tax system. 9 | id: uknino 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/National_Insurance_number 16 | name: UK National Insurance number 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_kpp.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Russian company supplemental registration code (KPP). Used in combination with 6 | Taxpayer INN code 7 | id: kpp 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian company supplemental registration code (KPP) 12 | translations: 13 | ru: 14 | doc: Вспомогательный код используемый вместе с кодом ИНН для идентификации налоговой 15 | принадлежности компании. 16 | name: Код постановки на учёт (КПП) 17 | classification: identifier 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_npi.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: A National Provider Identifier (NPI) is a unique 10-digit identification number 7 | issued to health care providers in the United States by the Centers for Medicare 8 | and Medicaid Services (CMS). 9 | id: usnpi 10 | langs: 11 | - en 12 | is_pii: 'True' 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/National_Provider_Identifier 16 | name: US National Provider Identifier (NPI) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/shipping/s10upu.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | - shipping 4 | doc: The UPU S10 standard defines a system for assigning 13-character identifiers 5 | to international postal items for the purpose of tracking and tracing them during 6 | shipping. 7 | id: s10upu 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | parent: 12 | type: tracknum 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/S10_(UPU_standard) 16 | regexp: ^[A-z]{2}\d{8}\d{1}[A-z]{2}$ 17 | name: S10 (UPU standard) 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/tools/other/metabase.yaml: -------------------------------------------------------------------------------- 1 | id: metabase 2 | category: other 3 | name: Metabase 4 | doc: 'Metabase is a simple and powerful analytics tool which lets anyone learn and make decisions from their company s data no technical knowledge required.' 5 | website: https://www.metabase.com/docs/latest/users-guide/field-types.html 6 | supported_types: 7 | - city 8 | - country 9 | - latitude 10 | - longitude 11 | - usstate 12 | - postindex 13 | - birthday 14 | - company 15 | - email 16 | - username 17 | - datetime 18 | - timestamp 19 | - time 20 | - date 21 | - url 22 | 23 | -------------------------------------------------------------------------------- /data/datatypes/AU/persons/au_medicare.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - AU 6 | doc: Medicare number is a unique identifier issued by Australian Government that enables 7 | the cardholder to receive a rebates of medical expenses under Australia Medicare 8 | system. 9 | id: aumedicarenum 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Medicare_card_(Australia) 16 | regexp: '[2-6]\d{3}\s\d{5}\s\d' 17 | name: Australian medicare number 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/DE/medical/opscode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | id: opscode 4 | is_pii: 'False' 5 | langs: 6 | - de 7 | name: OPS-Prozedurcode 8 | doc: DE datatype for 'OPS-Prozedurcode anhand Feldnamen' (opscode) from rules in context 9 | 'medical'. 10 | classification: identifier 11 | country: 12 | - DE 13 | links: [] 14 | examples: 15 | - value: '1234' 16 | description: OPS procedure code with 4 digits without fractional part 17 | - value: '1234.5' 18 | description: OPS procedure code with 4 digits and a 1-digit fractional part 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: ^[0-9]{4}(\.[0-9]{1,2})?$ 22 | -------------------------------------------------------------------------------- /data/datatypes/ES/persons/es_nie_number.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - ES 6 | doc: The NIE is a tax identification number in Spain, known in Spanish as the NIE, 7 | or more formally the Numero de identidad de extranjero 8 | id: esnie 9 | is_pii: 'True' 10 | langs: 11 | - es 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/NIE_number 15 | regexp: ^(X(-|\.)?0?\d{7}(-|\.)?[A-Z]|[A-Z](-|\.)?\d{7}(-|\.)?[0-9A-Z]|\d{8}(-|\.)?[A-Z])$ 16 | name: Spanish Foreigner Identity Number (NIE) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/EU/geo/eu_nuts.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - EU 5 | doc: European Union geographic code, identifier for a region per NUTS 6 | id: eunuts 7 | is_pii: 'False' 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P605 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics 13 | langs: 14 | - en 15 | name: NUTS code (EU) 16 | wikidata_property: P605 17 | regexp: '[A-Z]{2}[A-Z0-9]{0,3}' 18 | examples: 19 | - value: BE10 20 | description: Brussels-Capital Region 21 | classification: categorical 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/GB/persons/uk_driver_license.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - GB 6 | doc: In the United Kingdom, a driving licence is the official document which authorises 7 | its holder to operate motor vehicles on highways and other public roads. 8 | id: ukdriverlic 9 | is_pii: 'True' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Driving_licence_in_the_United_Kingdom 15 | regexp: ^[A-Z9]{5}\d{6}[A-Z9]{2}\d[A-Z]{2}$ 16 | name: UK driver license number 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_fips52.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: Two-digit identifier for US states and other associated areas per former Federal 6 | Information Processing Standard FIPS 5-2 standard 7 | id: fips52 8 | is_pii: 'False' 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P5087 12 | langs: 13 | - en 14 | name: FIPS 5-2 numeric code (US states) 15 | wikidata_property: P5087 16 | regexp: \d{2} 17 | examples: 18 | - value: '76' 19 | description: Navassa Island 20 | - value: '70' 21 | description: Palau 22 | classification: categorical 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/software/ssdeep.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: CTPH hash of the file content. 4 | id: ssdeep 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: other 10 | url: https://ssdeep-project.github.io/ssdeep/index.html 11 | - type: other 12 | url: https://developers.virustotal.com/reference/ssdeep 13 | name: SSDEEP hash 14 | examples: 15 | - value: 768:uPC0xySqWNPwcKnReqpxORBoWNOMFN5cYsFx1gAmOURksWrk/VwLtkKavNi3IJzU:uPC0xyowcklqHw9xGkLrNLtBiNR 16 | description: Virus total file Twain_32.dll 17 | classification: identifier 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/EU/transport/eu_cin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | country: 4 | - EU 5 | doc: The Craft Identification Number (CIN) or Hull Identification Number (HIN), standardised 6 | as EN ISO 10087:2006, is a permanent unique fourteen-digit alphanumeric identifier 7 | issued to all marine vessels in Europe. 8 | id: eucin 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Craft_Identification_Number 15 | name: Craft Identification Number 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_region.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Code or name of the region (federal subject) in Russia as it is in country Constitution 6 | id: rusregion 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian region (federal subject) coce or name 11 | patterns: 12 | - rusregionname 13 | - rusregioncode 14 | translations: 15 | ru: 16 | doc: Код или наименование субъекта Российской Федерации по Конституции страны 17 | name: Код или наименование субъекта Российской Федерации 18 | classification: categorical 19 | links: [] 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_npakind.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: 'Type of the Russian legal document: order, executive order and so on' 6 | id: runpakind 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian regulatory legal act type 11 | parent: 12 | type: legislationtype 13 | translations: 14 | ru: 15 | doc: 'Тип нормативного документа, например: указ, постановление, приказ и др., 16 | на русском языке' 17 | name: Тип нормативного документа в России/на русском языке 18 | classification: categorical 19 | links: [] 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/companies/orgname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | doc: Name of the organization/business/company in any language 4 | id: orgname 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Company or organization name 9 | parent: 10 | type: name 11 | links: 12 | - type: schema.org 13 | url: https://schema.org/legalName 14 | patterns: 15 | - encompanyname 16 | - rucompanyname 17 | translations: 18 | ru: 19 | doc: Название организации, бизнеса или компании на любом языке 20 | name: Название организации или компании 21 | classification: identifier 22 | examples: [] 23 | wikidata_property: '' 24 | regexp: '' 25 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/duration.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: The amount of time elapsed between two events 4 | id: duration 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/ISO_8601#Durations 11 | - type: schema.org 12 | url: https://schema.org/duration 13 | - type: other 14 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html 15 | name: Time duration 16 | translations: 17 | ru: 18 | doc: Длительность события в музыке, мероприятии и тд. 19 | name: Длительность 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/timespan.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: 'The TimeSpan structure represents a length of time (duration of time or elapsed 4 | time), and may be expressed as start/end, start/duration, or duration/end. Start, 5 | end, and duration are documented using the designated DateTime structures. Examples: 6 | start/end 2018-02-22T13:0' 7 | id: timespan 8 | is_pii: 'False' 9 | links: 10 | - type: other 11 | url: https://ddialliance.org/Specification/DDI-CV/DateType_1.1.html 12 | langs: 13 | - common 14 | name: Timespan 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | regexp: '' 19 | -------------------------------------------------------------------------------- /data/datatypes/DE/persons/de_personalausweis.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - DE 6 | doc: 'The German Identity Card is issued to German citizens by local registration 7 | offices in Germany and diplomatic missions abroad, while they are produced at the 8 | Bundesdruckerei in Berlin. ' 9 | id: depersonalausweis 10 | is_pii: 'True' 11 | langs: 12 | - de 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/German_identity_card 16 | regexp: ^[0-9]{12}$ 17 | name: Germany national identity card 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_kosgucode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian Public Administration Sector Classification Code (KOSGU) used in Russian 6 | budget planning and procurement 7 | id: kosgucode 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian Public Administration Sector Classification Code (KOSGU) 12 | translations: 13 | ru: 14 | doc: Код классификации секторов государственного управления используемые в Российской 15 | бюджетной системе 16 | name: Код КОСГУ 17 | classification: categorical 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_kvrcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian budget expense type code (KVR) used in budget planning and procurement 6 | id: kvrcode 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian budget expense type code (KVR) 11 | translations: 12 | ru: 13 | doc: Код вида расходов (КВР) используется как часть кода бюджетной классификации 14 | в процессах связанных с бюджетированием и госзакупками в Российской Федерации 15 | name: Код вида расходов (КВР) 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/persons/ru_snils.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - RU 6 | doc: Russian social insurance identifier. Issued for every insured person by Pension 7 | fund of Russia 8 | id: rusnils 9 | is_pii: 'True' 10 | langs: 11 | - ru 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/SNILS_(Russia) 15 | name: Individual insurance account number (SNILS) 16 | translations: 17 | ru: 18 | doc: Страховой номер индивидуального лицевого счёта физического лица (СНИЛС) 19 | name: Код СНИЛС 20 | classification: identifier 21 | examples: [] 22 | wikidata_property: '' 23 | regexp: '' 24 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/iso8601duration.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: The amount of intervening time in a time interval and are represented by the 4 | format P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W as shown on the aside 5 | id: iso8601duration 6 | langs: 7 | - common 8 | name: Duration 9 | semantic_type: duration 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/ISO_8601#Durations 13 | examples: 14 | - value: P3Y6M4DT12H30M5S 15 | description: Three years, six months, four days, twelve hours, thirty minutes, and 16 | five seconds 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/any/identifiers/uuid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - identifiers 3 | doc: A universally unique identifier (UUID) is a 128-bit label used for information 4 | in computer systems. The term globally unique identifier (GUID) is also used. 5 | id: uuid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Universally_unique_identifier 12 | name: A universally unique identifier (UUID) 13 | regexp: '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}' 14 | classification: identifier 15 | examples: [] 16 | wikidata_property: '' 17 | translations: {} 18 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_budgetcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Budget codes issued by Ministry of Finances and Federal Treasury of Russia to 6 | all government and local budgets. 7 | id: budgetcode 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian government budget code 12 | translations: 13 | ru: 14 | doc: 'Уникальный код, присваеваемый каждому отдельному бюджету: федеральному, 15 | субъекта федерации и муниципалитету в Российской Федерации.' 16 | name: Код бюджета 17 | classification: categorical 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_tofkcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Unique id of the Russian Federal treasury division. 6 | id: tofkcode 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian Federal treasury division code 11 | translations: 12 | ru: 13 | doc: Код территориального управления Федерального казначейства РФ. Синхронизовано 14 | с наименованием территориального управления, ведомственный справочник. 15 | name: Код территориального управления Федерального казначейства РФ 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/common/genre.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: Genre of the creative work, broadcast channel or group. 4 | id: genre 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | name: Genre 9 | links: 10 | - type: schema.org 11 | url: https://schema.org/genre 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P136 14 | wikidata_property: P136 15 | translations: 16 | ru: 17 | doc: Жанр картины, музыки, фильма, документа или иного произведения исскуства 18 | или иного созданного 19 | name: Жанр 20 | classification: categorical 21 | parent: 22 | type: category 23 | examples: [] 24 | regexp: '' 25 | -------------------------------------------------------------------------------- /data/datatypes/any/identifiers/dcid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - identifiers 3 | doc: Every entity in Data Commons (DC) has a unique identifier, called ‘dcid’. So, 4 | for example, the dcid of California is ‘geoId/06’ and of India is ‘country/IND’. 5 | id: dcid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: other 11 | url: https://docs.datacommons.org/bigquery/unique_identifiers.html 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P10730 14 | name: Datacommons unique id 15 | regexp: '[A-Za-z\d_/]+' 16 | wikidata_property: P10730 17 | classification: identifier 18 | examples: [] 19 | translations: {} 20 | -------------------------------------------------------------------------------- /_original/categories.csv: -------------------------------------------------------------------------------- 1 | common Common 2 | pii Personally idenfiable information 3 | geo Geographic identifiers 4 | medical Medical/pharma identifiers 5 | finances Finances and banking 6 | datetime Date and time 7 | government Government related 8 | science Scientific 9 | companies Companies/business/organizations 10 | internet Internet 11 | identifiers Universal unique identifiers 12 | objectids Objects identifiers 13 | cryptography Cryptographic identifiers 14 | persons Person related 15 | cryptocurrency Cryptocurrency 16 | transport Transportation 17 | values All measurable values (percentage, amount and e.t.c) 18 | chemistry Chemistry -------------------------------------------------------------------------------- /data/datatypes/GB/persons/uk_nhs_number.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - medical 5 | country: 6 | - GB 7 | doc: NHS numbers are the unique numbers allocated to registered users of the three 8 | public health services in England, Wales and the Isle of Man; the three health systems 9 | use a shared numbering scheme. 10 | id: uknhsnum 11 | is_pii: 'True' 12 | langs: 13 | - en 14 | links: 15 | - type: wikipedia 16 | url: https://en.wikipedia.org/wiki/NHS_number 17 | regexp: ([0-9]{3})[- ]?([0-9]{3})[- ]?([0-9]{4}) 18 | name: UK NHS Number 19 | classification: identifier 20 | examples: [] 21 | wikidata_property: '' 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_tofkname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Name of the Russian Federal treasury division 6 | id: tofkname 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian Federal treasury division name 11 | translations: 12 | ru: 13 | doc: Наименование территориального управления Федерального казначейства РФ. Синхронизовано 14 | с кодом территориального управления, ведомственный справочник. 15 | name: Наименование территориального управления Федерального казначейства РФ 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/US/finances/us_aba_routing.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: In the United States, an ABA routing transit number (ABA RTN) is a nine-digit 4 | code printed on the bottom of checks to identify the financial institution on which 5 | it was drawn. 6 | id: abaroutingnum 7 | is_pii: 'False' 8 | langs: 9 | - en 10 | country: 11 | - US 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/ABA_routing_transit_number 15 | regexp: '[0123678]\d{3}-\d{4}-\d' 16 | name: American Banking Association (ABA) routing number 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/CA/persons/ca_bc_ptn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - CA 6 | doc: Each B.C. resident enrolled with the Medical Services Plan (MSP) is given a unique 7 | lifetime identifier for health care called a Personal Health Number (PHN). 8 | id: cabcphn 9 | is_pii: 'True' 10 | langs: 11 | - en 12 | links: 13 | - type: other 14 | url: https://www2.gov.bc.ca/gov/content/health/health-drug-coverage/msp/bc-residents/personal-health-identification 15 | name: Canada British Columbia's Personal Health Number (PHN) 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_ppocode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian public legal formation is a legal definition of public entity with right 6 | of administration of its level budget 7 | id: ppocode 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian public legal formation code 12 | translations: 13 | ru: 14 | doc: Публично правовое образование - это организация/субъект федерации с правом 15 | распоряжения бюджетом своего уровня 16 | name: Код публично правового образования в РФ 17 | classification: categorical 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/ipv4subnet.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: Range of IPv4 addresses 4 | id: ipv4subnet 5 | is_pii: 'True' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P3761 11 | name: IPv4 routing prefix (subnet) 12 | wikidata_property: P3761 13 | regexp: (([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]?|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]?|25[0-5])\/([0-9]|[12][0-9]|3[0-2]) 14 | examples: 15 | - value: 163.1.0.0/16 16 | description: University of Oxford 17 | - value: 193.166.190.0/24 18 | description: Helsinki University Central Hospital 19 | translations: {} 20 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_kvrname.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian budget expense type code (KVR) used in budget planning and procurement 6 | id: kvrname 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | name: Russian budget expense type code name (KVR) 11 | translations: 12 | ru: 13 | doc: Код вида расходов (КВР) используется как часть кода бюджетной классификации 14 | в процессах связанных с бюджетированием и госзакупками в Российской Федерации 15 | name: Наименование кода вида расходов (КВР) 16 | classification: categorical 17 | links: [] 18 | examples: [] 19 | wikidata_property: '' 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/longitude.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - common 4 | doc: Longitude is a geographic coordinate that specifies the east–west position of 5 | a point on the Earths surface, or the surface of a celestial body. It is an angular 6 | measurement, usually expressed in degrees and denoted by the Greek letter lambda 7 | (λ) 8 | id: longitude 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Longitude 15 | - type: schema.org 16 | url: http://schema.org/longitude 17 | name: Longitude 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_pponame.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian public legal formation is a legal definition of public entity with right 6 | of administration of its level budget 7 | id: pponame 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian public legal formation name 12 | translations: 13 | ru: 14 | doc: Публично правовое образование - это организация/субъект федерации с правом 15 | распоряжения бюджетом своего уровня 16 | name: Наименование публично правового образования в РФ 17 | classification: categorical 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_ptin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: 'The Preparer Tax Identification Number (PTIN) is an identification number that 7 | all paid tax return preparers must use on U.S. federal tax returns or claims for 8 | refund submitted to the Internal Revenue Service (IRS). ' 9 | id: usptin 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: other 15 | url: https://en.wikipedia.org/wiki/Preparer_Tax_Identification_Number 16 | name: US PTIN (Preparer Tax Identification Number) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/files/filename.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | doc: File name with common notation like somefile.ext 4 | id: filename 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikipedia 10 | url: https://en.wikipedia.org/wiki/Filename 11 | - type: datadrivendiscovery 12 | url: https://metadata.datadrivendiscovery.org/types/FileName 13 | name: Name of the file 14 | examples: 15 | - value: sheet.xls 16 | description: sheet.xls 17 | - value: dataset.xml 18 | description: dataset.xml 19 | translations: 20 | ru: 21 | doc: Название файла в файловой системе 22 | name: Название файла 23 | wikidata_property: '' 24 | regexp: '' 25 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/latitude.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - common 4 | doc: In geography, latitude is a geographic coordinate that specifies the north–south 5 | position of a point on the Earths surface. Latitude is an angle (defined below) 6 | which ranges from 0° at the Equator to 90° (North or South) at the poles. 7 | id: latitude 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Latitude 14 | - type: schema.org 15 | url: http://schema.org/latitude 16 | name: latitude 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/science/doiprefix.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - science 3 | - identifiers 4 | doc: Identifier specific to a DOI registrant 5 | id: doiprefix 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Digital_object_identifier 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P1662 14 | wikidata_property: P1662 15 | name: Digital Object Identifier (DOI) prefix 16 | translations: 17 | ru: 18 | doc: Уникальный идентификатор для регистратора идентификаторов цифровых объектов 19 | name: Префикс DOI 20 | classification: identifier 21 | examples: [] 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/address.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | - pii 4 | - geo 5 | doc: Geographic address, commonly consist of postindex, country, area, town, street 6 | and building 7 | id: address 8 | is_pii: 'True' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Address 14 | name: Address (physical location) 15 | translations: 16 | ru: 17 | doc: Географический или административный адрес местонахождения. Может включать 18 | почтовый индекс, название страны, региона, города, улицы и здания. 19 | name: Адрес (физическое местонахождение) 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/CA/companies/cacorp.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | id: cacorp 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Federal corporation number 8 | doc: CA datatype for 'Federal corporation number' (cacorp) from rules 9 | in context 'companies'. 10 | classification: identifier 11 | country: 12 | - CA 13 | links: [] 14 | examples: 15 | - value: '1234567' 16 | description: Federal corporation number with 7 digits 17 | - value: '12345678' 18 | description: Federal corporation number with 8 digits 19 | - value: '123456789' 20 | description: Federal corporation number with 9 digits 21 | wikidata_property: '' 22 | translations: {} 23 | regexp: ^[0-9]{7,9}$ 24 | -------------------------------------------------------------------------------- /data/datatypes/CA/finances/cacharity.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: cacharity 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Charity registration number 8 | doc: CA datatype for 'Charity registration number' (cacharity) from 9 | rules in context 'finances'. 10 | classification: identifier 11 | country: 12 | - CA 13 | links: [] 14 | examples: 15 | - value: '123456789RR0001' 16 | description: Charity registration number with 9-digit BN, 'RR' program identifier and 4-digit reference 17 | - value: '987654321RR0002' 18 | description: Another valid charity registration number format 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: ^[0-9]{9}RR[0-9]{4}$ 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_kadastr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Russian land registry number (kadastroviy nomer, cadaster number) assigned to 6 | each land, territory, building and other geographic administrative objects by Rosreestr 7 | id: rukadastr 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian land registry number 12 | translations: 13 | ru: 14 | doc: Кадастровый номер — уникальный номер объекта недвижимости, присваиваемый 15 | ему при осуществлении кадастрового и технического учёта. 16 | name: Кадастровый номер 17 | classification: identifier 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_zipcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: A ZIP Code is a postal code used by the United States Postal Service (USPS). 6 | Introduced in 1963, the basic format consisted of five digits. In 1983, an extended 7 | ZIP+4 code was introduced; it included the five digits of the ZIP Code, followed 8 | by a hyphen and four digits that designated a more specific location. 9 | id: uszipcode 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/ZIP_Code 15 | name: US Postal code 16 | semantic_type: postindex 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/government/legislationtype.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | doc: The type of the legislation. Examples of values are "law", "act", "directive", 4 | "decree", "regulation", "statutory instrument", "loi organique", "reglement grand-ducal", 5 | etc., depending on the country. 6 | id: legislationtype 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: schema.org 12 | url: https://schema.org/legislationType 13 | name: Legislation type 14 | parent: 15 | type: genre 16 | translations: 17 | ru: 18 | doc: Тип нормативного документа 19 | name: Тип нормативного документа 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptography/crc32.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptography 3 | doc: A cyclic redundancy check (CRC) is an error-detecting code commonly used in digital 4 | networks and storage devices to detect accidental changes to digital data. Blocks 5 | of data entering these systems get a short check value attached, based on the remainder 6 | of a polynomial division of their contents. 7 | id: crc32 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | parent: 12 | type: numeric 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Cyclic_redundancy_check 16 | name: CRC-32 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/objectids/openlibraryid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | doc: Identifier for a work ("W"), edition ("M") or author ("A") for book data of the 4 | Internet Archive 5 | id: openlibid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Open_Library 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P648 14 | name: Open Library ID 15 | regexp: OL[1-9]\d*[AMW] 16 | wikidata_property: P648 17 | examples: 18 | - value: OL36858W 19 | description: Twenty Years After 20 | - value: OL3156833A 21 | description: Anton Chekhov 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/software/richpeheader.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: The Rich header is an undocumented header contained within PE files compiled 4 | and linked using the Microsoft toolchain. It contains information about the build 5 | environment that the PE file was created in. 6 | id: richpeheaderhash 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: other 12 | url: https://github.com/RichHeaderResearch/RichPE 13 | name: Rich PE Header hash 14 | examples: 15 | - value: 6fef15a59affb0e30563fc8baf21ed67 16 | description: Virus total file Twain_32.dll 17 | classification: identifier 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/DE/finances/handelsregisternr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: handelsregisternr 4 | is_pii: 'False' 5 | langs: 6 | - de 7 | name: Handelsregisternummer anhand Feldname 8 | doc: DE datatype for 'Handelsregisternummer anhand Feldname' (handelsregisternr) from 9 | rules in context 'finances'. 10 | classification: identifier 11 | country: 12 | - DE 13 | links: [] 14 | examples: 15 | - value: 'H12345' 16 | description: Handelsregisternummer with single-letter prefix and 5-digit number 17 | - value: 'A1' 18 | description: Handelsregisternummer with single-letter prefix and 1-digit number 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '^[A-Z][0-9]{1,6}$' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/postindex.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - pii 4 | - common 5 | doc: Postal index/code, unique post identifier used in address 6 | id: postindex 7 | is_pii: 'True' 8 | langs: 9 | - common 10 | links: 11 | - type: wikidata 12 | url: https://www.wikidata.org/wiki/Property:P281 13 | - type: schema.org 14 | url: https://schema.org/postalCode 15 | wikidata_property: P281 16 | name: Postal code/index 17 | patterns: 18 | - ukpostalcode 19 | - uszipcode 20 | - ruspostalcode 21 | - frpostcode 22 | - espostcode 23 | translations: 24 | ru: 25 | doc: Индекс почтовой связи 26 | name: Почтовый индекс 27 | classification: categorical 28 | examples: [] 29 | regexp: '' 30 | -------------------------------------------------------------------------------- /data/datatypes/any/shipping/unpackaginggroup.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | - chemistry 4 | doc: Packaging code according to UN transportation rules 5 | id: unpkggroup 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P876 12 | name: UN Packaging group 13 | wikidata_property: P876 14 | regexp: '(I\/II)|(II\/III)|I{1,3}' 15 | examples: 16 | - value: 'I' 17 | description: sodium azide 18 | - value: 'II' 19 | description: hydrofluoric acid 20 | translations: 21 | ru: 22 | doc: 'Код группы упаковки ООН' 23 | name: Код группы упаковки ООН 24 | classification: categorical -------------------------------------------------------------------------------- /data/datatypes/any/telecom/msisdn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - telecom 3 | - pii 4 | doc: MSISDN is a number uniquely identifying a subscription in a Global System for 5 | Mobile communications or a Universal Mobile Telecommunications System mobile network. 6 | It is the mapping of the telephone number to the subscriber identity module in a 7 | mobile or cellular phone. 8 | id: msisdn 9 | is_pii: 'True' 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/MSISDN 13 | regexp: ^[1-9][0-9]{10,14}$ 14 | langs: 15 | - common 16 | name: Mobile Subscriber ISDN Number (MSISDN) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | -------------------------------------------------------------------------------- /data/datatypes/AR/persons/ar_dni.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - AR 6 | doc: Documento Nacional de Identidad or DNI (which means National Identity Document) 7 | is the main identity document for Argentine citizens, as well as temporary or permanent 8 | resident aliens (DNI Extranjero). 9 | id: ardni 10 | is_pii: 'True' 11 | langs: 12 | - es 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Documento_Nacional_de_Identidad_(Argentina) 16 | regexp: '[0-9]{2}[\.]{1}?[0-9]{3}[\.]{1}?[0-9]{3}' 17 | name: Argentina DNI (National Identity Document) 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_itin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: A Taxpayer Identification Number (TIN) is an identifying number used for tax 7 | purposes in the United States and in other countries under the Common Reporting 8 | Standard. 9 | id: usitin 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Taxpayer_Identification_Number 16 | regexp: (9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4}) 17 | name: US ITIN (Individual Taxpayer Identification Number) 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_ssn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: In the United States, a Social Security number (SSN) is a nine-digit number issued 7 | to U.S. citizens, permanent residents, and temporary (working) residents under section 8 | 205(c)(2) of the Social Security Act, codified as 42 U.S.C. § 405(c)(2) 9 | id: usssn 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Social_Security_number 16 | regexp: ([0-9]{3})[- .]([0-9]{2})[- .]([0-9]{4}) 17 | name: US Social Security Number 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/any/objectids/viafid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | doc: 'Identifier for the Virtual International Authority File database [format: up 4 | to 22 digits]' 5 | id: viafid 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Virtual_International_Authority_File 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P214 14 | name: VIAF ID 15 | examples: 16 | - value: '44298806' 17 | description: Foncine, Jean-Louis 18 | - value: '125715126' 19 | description: Cairo 20 | regexp: ([1-9]\d(?:\d{0,7}|\d{17,20})) 21 | wikidata_property: P214 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/RU/geo/ru_kladr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - RU 5 | doc: Russian unique codes assigned by Government Tax Service for every administrative 6 | address in Russian federation. Replaces by FIAS government system 7 | id: kladr 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: KLADR code (Russian) 12 | translations: 13 | ru: 14 | doc: КЛАДР — ведомственный классификатор ФНС России, созданный для распределения 15 | территорий между налоговыми инспекциями и автоматизированной рассылки корреспонденции. 16 | Заменен системой ФИАС 17 | name: Код КЛАДР 18 | classification: identifier 19 | links: [] 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/unixtime.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - datetime 3 | doc: Digital timestamp defined as Unix Time. It is a system for describing a point 4 | in time. It is the number of seconds that have elapsed since the Unix epoch, excluding 5 | leap seconds. 6 | id: unixtime 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Unix_time 13 | name: Digital timestamp (Unix time, Epoch time, Posix time) 14 | regexp: \d{1,10} 15 | examples: 16 | - value: '1652190194' 17 | description: '2022-05-10T13:43:14+00:00' 18 | - value: '915148800' 19 | description: '1998-12-31T23:59:60.25' 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/railway/uiccode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: Identifier for a railway station in Europe, CIS countries, the Far East (China, 4 | Mongolia, Japan, Korea, Vietnam), North Africa and the Middle East 5 | id: uiccode 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikidata 11 | url: https://www.wikidata.org/wiki/Property:P722 12 | name: UIC station code 13 | wikidata_property: P722 14 | regexp: (10|2\d|3[0-3]|4[01249]|5\d|6[0125678]|7\d|8[0-8]|9\d).+| 15 | examples: 16 | - value: '8748100' 17 | description: Nantes Station 18 | - value: '7120100' 19 | description: Train station of Astorga 20 | classification: identifier 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/CA/companies/cabizlic.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | id: cabizlic 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Business licence number 8 | doc: CA datatype for 'Business licence number' (cabizlic) from rules 9 | in context 'companies'. 10 | classification: identifier 11 | country: 12 | - CA 13 | links: [] 14 | examples: 15 | - value: A1234 16 | description: Business licence number with 1 letter and 4 digits 17 | - value: BC12345 18 | description: Business licence number with 2 letters and 5 digits 19 | - value: XYZ1234567 20 | description: Business licence number with 3 letters and 7 digits 21 | wikidata_property: '' 22 | translations: {} 23 | regexp: ^[A-Z]{1,3}[0-9]{4,7}$ 24 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_okpo.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Russian organizations statistical code (OKPO) issued by Rosstat 6 | id: okpo 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | links: 11 | - type: wikidata 12 | url: https://www.wikidata.org/wiki/Property:P2391 13 | name: Russian organization stat code 14 | wikidata_property: P2391 15 | regexp: '\d{8}' 16 | examples: 17 | - value: '00040778' 18 | description: Gazprom 19 | - value: '03323755' 20 | description: TEK SPB 21 | translations: 22 | ru: 23 | doc: Код по Общероссийскому классификатору предприятий и организаций 24 | name: Код ОКПО 25 | classification: identifier -------------------------------------------------------------------------------- /data/datatypes/any/industry/isicrev4.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - industry 3 | langs: 4 | - common 5 | doc: Code of industry by ISIC (International Standard Industrial Classification of 6 | All Economic Activities) 7 | id: isicrev4 8 | is_pii: 'False' 9 | links: 10 | - type: other 11 | url: https://www.oecd-ilibrary.org/content/publication/8722852c-en 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P1796 14 | name: ISIC rev 4 class code 15 | wikidata_property: P1796 16 | regexp: ([A-U]|\d{2,4}) 17 | examples: 18 | - value: '3030' 19 | description: aerospace industry 20 | - value: '3020' 21 | description: locomotive manufacturing 22 | classification: categorical 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/EU/transport/eu_eninumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | country: 4 | - EU 5 | doc: An ENI number (European Number of Identification or European Vessel Identification 6 | Number) is a registration for ships capable of navigating on inland European waters. 7 | It is a unique, eight-digit identifier that is attached to a hull for its entire 8 | lifetime, independent of the vessels current name or flag. 9 | id: eninumber 10 | is_pii: 'False' 11 | langs: 12 | - common 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/ENI_number 16 | name: ENI Number 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_fedgrbs.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: 3 digits code of the Russian direct government budget receiver and distributor. 6 | Assigned to each government ministry, agency, service and some other budget distributor 7 | organizations 8 | id: fedgrbs 9 | is_pii: 'False' 10 | langs: 11 | - ru 12 | name: Russian direct government budget receiver and distributor code 13 | translations: 14 | ru: 15 | name: Код главного распорядителя бюджетных средств 16 | doc: Код из 3 цифр определяющий конкретного распределителя бюджетных средств (ГРБС) 17 | classification: categorical 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/SE/persons/se_personnumer.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - SE 6 | doc: 'The personal identity number (Swedish: personnummer) is the Swedish national 7 | identification number. It is a ten digit number that is widely used in Sweden to 8 | identify individuals.' 9 | id: sepersonnummer 10 | is_pii: 'True' 11 | langs: 12 | - se 13 | links: 14 | - type: other 15 | url: https://en.wikipedia.org/wiki/Personal_identity_number_(Sweden) 16 | name: Sweden personal identity number (personnummer) 17 | classification: identifier 18 | examples: [] 19 | wikidata_property: '' 20 | translations: {} 21 | regexp: ^[0-9]{2,4}[0-9]{2}(0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1])[-+][0-9]{4}$ 22 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/maritime/imonumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: The International Maritime Organization (IMO) number is a unique identifier for 4 | ships, registered ship owners and management companies. 5 | id: imonumber 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/IMO_number 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P458 14 | name: IMO ship Number 15 | wikidata_property: P458 16 | regexp: ([1-9]\d{6}) 17 | examples: 18 | - value: '6725418' 19 | description: Queen Elizabeth 2 20 | - value: '9224752' 21 | description: TI Asia 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/CA/finances/cabn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: cabn 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: Canada Business Number 8 | doc: CA datatype for 'Canada Business Number' (cabn) from rules in context 9 | 'finances'. 10 | classification: identifier 11 | country: 12 | - CA 13 | links: 14 | - type: doc 15 | url: https://www.canada.ca/en/revenue-agency/services/tax/businesses/topics/registering-your-business/business-number.html 16 | examples: 17 | - value: '123456789' 18 | description: Canada Business Number with 9 digits 19 | - value: '987654321' 20 | description: Another valid Canada Business Number with 9 digits 21 | wikidata_property: '' 22 | translations: {} 23 | regexp: ^[0-9]{9}$ 24 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_ogrn_ogrnip.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - RU 5 | doc: Russian primary government identification number assigned by Tax Service to any 6 | business entity (OGRN) in Russia and sole prprietors (OGRNIP) 7 | id: ogrn_ogrnip 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian primary government identification number (OGRN and OGRNIP) 12 | translations: 13 | ru: 14 | doc: Основной государственный регистрационный номер индивидуального предпринимателя 15 | или ОГРН (основной государственный регистрационный номер) 16 | name: Код ОГРН или ОГРНИП 17 | classification: identifier 18 | links: [] 19 | examples: [] 20 | wikidata_property: '' 21 | regexp: '' 22 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/tickersymbol.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: Ticker symbol or stock symbol is an abbreviation used to uniquely identify publicly 4 | traded shares of a particular stock on a particular stock market. 5 | id: tickersymbol 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Ticker_symbol 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P249 14 | name: Stock ticker symbol 15 | regexp: '[A-Z0-9.]{1,16}' 16 | wikidata_property: P249 17 | examples: 18 | - value: MSFT 19 | description: Microsoft 20 | - value: AMZN 21 | description: Amazon 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_ein.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: An Employer Identification Number (EIN) is also known as a Federal Tax Identification 7 | Number, and is used to identify a business entity. Generally, businesses need an 8 | EIN. 9 | id: usein 10 | is_pii: 'True' 11 | langs: 12 | - en 13 | links: 14 | - type: other 15 | url: https://www.irs.gov/businesses/small-businesses-self-employed/employer-id-numbers 16 | - type: wikipedia 17 | url: https://en.wikipedia.org/wiki/Employer_Identification_Number 18 | regexp: '[0-9]{9}' 19 | name: US EIN (Employer Identification Number) 20 | classification: identifier 21 | examples: [] 22 | wikidata_property: '' 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/datetime/birthday.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - datetime 4 | doc: A birthday is the anniversary of the birth of a person, or figuratively of an 5 | institution. Birthdays of people are celebrated in numerous cultures, often with 6 | birthday gifts, birthday cards, a birthday party, or a rite of passage. 7 | id: birthday 8 | is_pii: 'True' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Birthday 14 | name: Birthday 15 | parent: 16 | type: date 17 | translations: 18 | ru: 19 | doc: 'День рождения — годовщина рождения, соответствует дате рождения человека. ' 20 | name: День рождения 21 | examples: [] 22 | wikidata_property: '' 23 | regexp: '' 24 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/what3words.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: What3words is a proprietary geocode system designed to identify any location 4 | with a resolution of about 3 metres (9.8 ft). It is owned by What3words Limited, 5 | based in London, England. 6 | id: what3words 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/What3word 13 | name: What3Words 14 | wikidata_property: P1566 15 | regexp: '[1-9][0-9]{0,8}|' 16 | examples: 17 | - value: thrive.collaborating.slangy 18 | description: What3Words example 1 19 | - value: squirrels.devours.longitudes 20 | description: What3Words example 2 21 | classification: identifier 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/GB/geo/uk_uprn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - GB 5 | doc: The Unique Property Reference Number (UPRN) is a unique alphanumeric identifier 6 | (a geocode) for every spatial address in Great Britain and can be found in Ordnance 7 | Survey AddressBase products. 8 | id: uprn 9 | is_pii: 'False' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/UPRN 15 | - type: wikidata 16 | url: https://www.wikidata.org/wiki/Property:P8399 17 | wikidata_property: P8399 18 | name: UK Unique Property Reference Number (UPRN) 19 | classification: identifier 20 | examples: 21 | - value: '9051138577' 22 | description: Marischal College 23 | regexp: \d{12} 24 | translations: {} 25 | -------------------------------------------------------------------------------- /data/datatypes/RU/government/ru_kbk.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - RU 5 | doc: Russian budget classification code KBK (kod byudzhetnoy klassifikacii) identifiers 6 | certain budget allocation of the federal or regional budget 7 | id: kbk 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: Russian budget classification code (KBK) 12 | translations: 13 | ru: 14 | doc: Код бюджетной классификации (КБК) используется в бюджетной системе РФ для 15 | идентификации конкретной строки расхода федерального или регионального государственного 16 | бюджета. 17 | name: Код бюджетной классификации (КБК) 18 | classification: categorical 19 | links: [] 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/swiftcode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: ISO 9362 defines a standard format of Business Identifier Codes (also known as 4 | SWIFT-BIC, BIC, SWIFT ID or SWIFT code) approved by the International Organization 5 | for Standardization (ISO). 6 | id: swiftcode 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/ISO_9362 13 | - type: wikidata 14 | url: https://www.wikidata.org/wiki/Property:P2627 15 | name: Bank SWIFT code (ISO 9362) 16 | regexp: '[A-Z]{6}[0-9A-Z]{2}([0-9A-Z]{3})?' 17 | wikidata_property: P2627 18 | examples: 19 | - value: DEUTDEFF 20 | description: Deutsche Bank 21 | classification: identifier 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/datatypes/any/software/authentihash.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - software 3 | doc: authentihash is a sha256 hash used by Microsoft to verify that the relevant sections 4 | of a PE image file have not been altered. This specific type of hash is used by 5 | Microsoft AppLocker. 6 | id: authentihash 7 | is_pii: 'False' 8 | parent: 9 | type: sha256hash 10 | langs: 11 | - common 12 | links: 13 | - type: other 14 | url: https://developers.virustotal.com/reference/authentihash 15 | name: authentihash 16 | examples: 17 | - value: b9be0c1b4a48e7acd1bd186d31f86212794a824f0a9602242b054f9d98bd7c70 18 | description: Virus total file Twain_32.dll 19 | classification: identifier 20 | wikidata_property: '' 21 | translations: {} 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/US/industry/us_soc.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - industry 3 | country: 4 | - US 5 | - GB 6 | doc: Standard Occupational Classification System 7 | id: ensoc 8 | is_pii: 'False' 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Standard_Occupational_Classification_System 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P919 14 | langs: 15 | - en 16 | name: Standard Occupational Classification System (US and GB) 17 | wikidata_property: P919 18 | regexp: \d{2}\-\d{6} 19 | examples: 20 | - value: 15-1242 21 | description: Database Administrators 22 | - value: 21-1023 23 | description: 'Mental Health and Substance Abuse Social Workers ' 24 | classification: categorical 25 | translations: {} 26 | -------------------------------------------------------------------------------- /data/datatypes/any/finances/iban.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | doc: The International Bank Account Number (IBAN) is an internationally agreed system 4 | of identifying bank accounts across national borders to facilitate the communication 5 | and processing of cross border transactions with a reduced risk of transcription 6 | errors. An IBAN uniquely identifies the account of a customer at a financial institution. 7 | id: iban 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/International_Bank_Account_Number 14 | name: International Bank Account Number 15 | classification: identifier 16 | examples: [] 17 | wikidata_property: '' 18 | translations: {} 19 | regexp: '' 20 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_atin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - US 6 | doc: An ATIN is an Adoption Taxpayer Identification Number issued by the Internal 7 | Revenue Service as a temporary taxpayer identification number for the child in a 8 | domestic adoption where the adopting taxpayers do not have and/or are unable to 9 | obtain the child's Social Security Number (SSN). 10 | id: usatin 11 | is_pii: 'True' 12 | langs: 13 | - en 14 | links: 15 | - type: other 16 | url: https://www.irs.gov/individuals/adoption-taxpayer-identification-number 17 | regexp: '[0-9]{9}' 18 | name: US ATIN (Adoption Taxpayer Identification Number) 19 | classification: identifier 20 | examples: [] 21 | wikidata_property: '' 22 | translations: {} 23 | -------------------------------------------------------------------------------- /data/tools/pii/ibm-guardium.yaml: -------------------------------------------------------------------------------- 1 | id: ibmguardium 2 | category: pii 3 | name: IBM Security Guardium Analyzer 4 | doc: 'IBM Security Guardium Analyzer is a powerful service that can help you with this data security effort. Simple to use, Guardium Analyzer allows you to set up connections to your data sources - with no need to configure classification or risk scanning.' 5 | website: https://www.ibm.com/docs/en/sga 6 | supported_types: 7 | - email 8 | - phone 9 | - bankcard 10 | - address 11 | - person_name 12 | - person_fullname 13 | - person_surname 14 | - person_firstname 15 | - benatcardid 16 | - ipaddr 17 | - ussn 18 | - uknhsnum 19 | - esnif 20 | - aumedicarenum 21 | - autfn 22 | - casin 23 | -------------------------------------------------------------------------------- /data/tools/pii/metadata-guardian.yaml: -------------------------------------------------------------------------------- 1 | id: metadata-guardian 2 | category: pii 3 | name: Metadata Guardian 4 | doc: Metadata Guardian is a Python package that provides an easy way to protect your data sources by searching its metadata. By searching with data rules, it will detect what you are looking to protect. Using Rust, it makes blazing fast multi-regex matching. 5 | website: https://github.com/fvaleye/metadata-guardian 6 | supported_types: 7 | - email 8 | - phone 9 | - ipv4 10 | - ipv6 11 | - ipaddr 12 | - postindex 13 | - address 14 | - person_firstname 15 | - person_surname 16 | - person_fullname 17 | - birthday 18 | - gender 19 | - nationality 20 | - username 21 | - password 22 | 23 | -------------------------------------------------------------------------------- /data/datatypes/DK/persons/dk_cpr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | country: 5 | - DK 6 | doc: 'The Danish Personal Identification number is a national identification number, 7 | which is part of the personal information stored in the Civil Registration System 8 | (Danish: Det Centrale Personregister, Greenlandic: Inunnik Qitiusumik Nalunaarsuiffik).' 9 | id: dkcprnum 10 | is_pii: 'True' 11 | langs: 12 | - da 13 | links: 14 | - type: wikipedia 15 | url: https://en.wikipedia.org/wiki/Personal_identification_number_(Denmark) 16 | regexp: /^(0[1-9]|[12]\d|3[01])(0[1-9]|1[0-2])\d{2}[-]?\d{4}$ 17 | name: CPR-Number, Personal identification number (Denmark) 18 | classification: identifier 19 | examples: [] 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/US/finances/us_cusip.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | country: 4 | - US 5 | doc: A CUSIP is a nine-digit numeric (e.g. 037833100 for Apple) or nine-character 6 | alphanumeric (e.g. 38259P508 for Google) code that identifies a North American financial 7 | security for the purposes of facilitating clearing and settlement of trades 8 | id: cusip 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/CUSIP 15 | name: CUSIP code 16 | regexp: ^[0-9]{3}[a-zA-Z0-9]{2}[a-zA-Z0-9*@#]{3}[0-9]$ 17 | examples: 18 | - value: 037833100 19 | description: Apple 20 | - value: 38259P508 21 | description: Google 22 | classification: identifier 23 | wikidata_property: '' 24 | translations: {} 25 | -------------------------------------------------------------------------------- /data/datatypes/US/geo/us_fips64.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - US 5 | doc: Identifier for US entities (mostly counties) per "Federal Information Processing 6 | Series" (FIPS 6-4), used for counties, Puerto Rico zona urbana, Metropolitan Statistical 7 | Areas (MSA) and Combined Statistical Areas (CSA) in the United States. 8 | id: fips64 9 | is_pii: 'False' 10 | links: 11 | - type: wikidata 12 | url: https://www.wikidata.org/wiki/Property:P882 13 | langs: 14 | - en 15 | name: FIPS 6-4 numeric code (US counties) 16 | wikidata_property: P882 17 | regexp: \d{5} 18 | examples: 19 | - value: '24003' 20 | description: Anne Arundel County 21 | - value: '01001' 22 | description: Autauga Country 23 | classification: categorical 24 | translations: {} 25 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/p2shaddr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | doc: 'Pay to script hash (P2SH) transactions were standardised in BIP 16. They allow 4 | transactions to be sent to a script hash (address starting with 3) instead of a 5 | public key hash (addresses starting with 1). To spend bitcoins sent via P2SH, the 6 | recipient must provide a script matching the script hash and data which makes the 7 | script evaluate to true. ' 8 | id: p2shaddr 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: other 14 | url: https://allprivatekeys.com/bitcoin-address-format 15 | name: Script hash (P2SH address) 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/iataairlinecode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: IATA airline designators, sometimes called IATA reservation codes, are two-character 4 | codes assigned by the International Air Transport Association (IATA) to the worlds 5 | airlines. 6 | id: iataairlinecode 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Airline_codes 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P229 14 | name: IATA airline designator (airline code) 15 | semantic_type: airlinecode 16 | wikidata_property: P229 17 | regexp: '[A-Z0-9]{2}' 18 | examples: 19 | - value: MH 20 | description: Malaysia Airlines 21 | - value: QW 22 | description: Blue Wings 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/FR/geo/fr_inseecode.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | country: 4 | - FR 5 | doc: 'The INSEE code is a numerical indexing code used by the French National Institute 6 | for Statistics and Economic Studies (INSEE) to identify various entities, including 7 | communes and départements. ' 8 | id: inseecode 9 | is_pii: 'False' 10 | langs: 11 | - fr 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/INSEE_code 15 | - type: wikidata 16 | url: https://www.wikidata.org/wiki/Property:P374 17 | wikidata_property: P374 18 | regexp: \d[0-9AB][0-9P]\d\d 19 | name: INSEE code 20 | examples: 21 | - value: '75056' 22 | description: Paris 23 | - value: '48095' 24 | description: Mende 25 | classification: identifier 26 | translations: {} 27 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/unm49.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: UN M49 or the Standard Country or Area Codes for Statistical Use (Series M, No. 4 | 49) is a standard for area codes used by the United Nations for statistical purposes, 5 | developed and maintained by the United Nations Statistics Division. 6 | id: unm49 7 | is_pii: 'False' 8 | langs: 9 | - en 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/UN_M49 13 | - type: wikidata 14 | url: https://www.wikidata.org/wiki/Property:P2082 15 | name: UN M49 16 | wikidata_property: P2082 17 | regexp: '[0-9]{3}' 18 | examples: 19 | - value: '005' 20 | description: South America 21 | - value: '554' 22 | description: New Zealand 23 | classification: categorical 24 | translations: {} 25 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/city.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - pii 4 | - common 5 | doc: A city is a large human settlement. It can be defined as a permanent and densely 6 | settled place with administratively defined boundaries whose members work primarily 7 | on non-agricultural tasks. 8 | id: city 9 | is_pii: 'True' 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/City 15 | - type: schema.org 16 | url: https://schema.org/City 17 | name: City 18 | patterns: 19 | - ruscity 20 | - uscity 21 | translations: 22 | ru: 23 | doc: Город — крупный населённый пункт, жители которого заняты, как правило, не 24 | сельским хозяйством. 25 | name: Город 26 | examples: [] 27 | wikidata_property: '' 28 | regexp: '' 29 | -------------------------------------------------------------------------------- /data/datatypes/RU/medical/ru_medmnn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - medical 3 | country: 4 | - RU 5 | doc: An international nonproprietary name (INN) is an official generic and non-proprietary 6 | name given to a pharmaceutical drug or an active ingredient. (Russian) 7 | id: rumedmnn 8 | is_pii: 'False' 9 | langs: 10 | - ru 11 | name: International nonproprietary name (Russia) 12 | translations: 13 | ru: 14 | doc: Международное непатентованное наименование (МНН) — уникальное наименование 15 | действующего вещества лекарственного средства, рекомендованное Всемирной организацией 16 | здравоохранения (ВОЗ). 17 | name: Международное непатентованное наименование 18 | classification: identifier 19 | links: [] 20 | examples: [] 21 | wikidata_property: '' 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/iso6709.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: Geographic point as string by ISO 6709, ISO 6709, Standard representation of 4 | geographic point location by coordinates, is the international standard for representation 5 | of latitude, longitude and altitude for geographic point locations. 6 | id: geopoint 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/ISO_6709 13 | name: ISO 6709 geopoint 14 | examples: 15 | - value: 50°40′46.461″N 95°48′26.533″W 123.45m 16 | description: Geopoint example 1 17 | - value: 50°03′46.461″S 125°48′26.533″E 978.90m 18 | description: Geopoint example 2 19 | classification: identifier 20 | wikidata_property: '' 21 | translations: {} 22 | regexp: '' 23 | -------------------------------------------------------------------------------- /data/datatypes/any/chemistry/unclass.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - chemistry 3 | doc: UN hazard classification code 4 | id: unclass 5 | is_pii: 'False' 6 | langs: 7 | - common 8 | links: 9 | - type: wikidata 10 | url: https://www.wikidata.org/wiki/Property:P874 11 | name: UN Class 12 | wikidata_property: P874 13 | regexp: '(1\.[1-6]|[24]\.[123]|[56]\.[12]|[3789])' 14 | examples: 15 | - value: '6.1' 16 | description: sodium azide 17 | translations: 18 | ru: 19 | doc: 'Номер ООН, Номер UN или UN-идентификатор — четырёхзначное число, позволяющее определить опасность вещества или изделия (такого, например, как взрывчатое вещество, легковоспламеняющаяся жидкость, ядовитое вещество и др.)' 20 | name: Код класса опасности ООН 21 | classification: categorical -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/airport.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | - transport 4 | doc: 'A location identifier is a symbolic representation for the name and the location 5 | of an airport, navigation aid, or weather station, and is used for staffed air traffic 6 | control facilities in air traffic control, telecommunications, computer programming, 7 | weather reports, and related services. ' 8 | id: airport 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/Location_identifier 15 | name: Airport location identifier 16 | patterns: 17 | - icaoairportcode 18 | - iataairportcode 19 | - faalid 20 | classification: identifier 21 | examples: [] 22 | wikidata_property: '' 23 | translations: {} 24 | regexp: '' 25 | -------------------------------------------------------------------------------- /data/datatypes/any/companies/opencorporatesid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | doc: 'Identifier for a corporation, in the OpenCorporates database. Format: country 4 | prefix, optional subnational entity abbrevation, "/", alphanumeric idid: oid' 5 | is_pii: 'False' 6 | id: opencorporatesid 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/OpenCorporates 12 | - type: wikidata 13 | url: https://www.wikidata.org/wiki/Property:P1320 14 | name: Open Corporates ID 15 | wikidata_property: P1320 16 | regexp: '[a-z]{2}(_[a-z]{2})?/[a-zA-ZÖÜÄ0-9\-_]*[0-9A-Za-f]' 17 | examples: 18 | - value: gb/SC129785 19 | description: Devro 20 | - value: mx/2682534 21 | description: Mexican Red Cross 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/p2pkhaddr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | doc: 'A Bitcoin address is only a hash, so the sender can''t provide a full public 4 | key in scriptPubKey. When redeeming coins that have been sent to a Bitcoin address, 5 | the recipient provides both the signature and the public key. The script verifies 6 | that the provided public key does hash to the hash in scriptPubKey, and then it 7 | also checks the signature against the public key. ' 8 | id: p2pkhaddr 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: other 14 | url: https://allprivatekeys.com/bitcoin-address-format 15 | name: Pubkey hash (P2PKH address) 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/RU/companies/ru_okfs.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - common 3 | country: 4 | - RU 5 | doc: Russian classifier of forms of ownership, OKFS 6 | id: okfs 7 | is_pii: 'False' 8 | langs: 9 | - ru 10 | links: 11 | - type: other 12 | url: https://ru.wikipedia.org/wiki/Общероссийские_классификаторы_технико-экономической_и_социальной_информации 13 | name: Russian form of ownership code (OKFS) 14 | regexp: \d{2} 15 | examples: 16 | - value: '12' 17 | description: Федеральная собственность 18 | - value: '22' 19 | description: Собственность иностранных государств 20 | translations: 21 | ru: 22 | doc: ОКФС — это общероссийский классификатор форм собственности, входящий в Единую 23 | систему кодирования РФ 24 | name: Код ОКФС 25 | classification: categorical 26 | wikidata_property: '' 27 | -------------------------------------------------------------------------------- /data/datatypes/any/cryptocurrency/bip32addr.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - cryptocurrency 3 | doc: The BIP-32 mainly describes the way of building the hierarchical deterministic 4 | wallet basing on the BIP-39 features. What does it mean? BIP-32 is exactly explaining 5 | how the bitcoin master private key and the master chain code is being created from 6 | delivered by BIP-39 bitcoin seed. The bitcoin seed generated by BIP-39 is the base 7 | for generating deterministic and hierarchical deterministic wallets. 8 | id: bip32addr 9 | is_pii: 'False' 10 | langs: 11 | - common 12 | links: 13 | - type: other 14 | url: https://allprivatekeys.com/bitcoin-address-format 15 | name: BIP-32 key 16 | classification: identifier 17 | examples: [] 18 | wikidata_property: '' 19 | translations: {} 20 | regexp: '' 21 | -------------------------------------------------------------------------------- /data/datatypes/any/internet/fqdn.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - internet 3 | doc: A domain name is an identification string that defines a realm of administrative 4 | autonomy, authority or control within the Internet. 5 | id: fqdn 6 | is_pii: 'False' 7 | langs: 8 | - common 9 | links: 10 | - type: wikipedia 11 | url: https://en.wikipedia.org/wiki/Fully_qualified_domain_name 12 | name: Full qualified domain name (FQDN) 13 | regexp: ^(?!:\/\/)(?=.{1,255}$)((.{1,63}\.){1,127}(?![0-9]*$)[a-z0-9-]+\.?)$ 14 | examples: 15 | - value: ec2-35-160-210-253.us-west-2-.compute.amazonaws.com 16 | description: Amazon web services ec2 server 17 | - value: xn--kxae4bafwg.xn--pxaix.gr 18 | description: Greek language national domain name 19 | classification: identifier 20 | wikidata_property: '' 21 | translations: {} 22 | -------------------------------------------------------------------------------- /data/datatypes/EU/companies/eu_vatin.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | country: 4 | - EU 5 | doc: A value-added tax identification number or VAT identification number (VATIN) 6 | is an identifier used in many countries, including the countries of the European 7 | Union, for value-added tax purposes. 8 | id: vatin 9 | is_pii: 'False' 10 | langs: 11 | - en 12 | links: 13 | - type: wikipedia 14 | url: https://en.wikipedia.org/wiki/VAT_identification_number 15 | - type: wikidata 16 | url: https://www.wikidata.org/wiki/Property:P3608 17 | name: EU VAT ID 18 | wikidata_property: P3608 19 | regexp: '[A-Z]{2}[A-Z\d]+|' 20 | examples: 21 | - value: FI15243611 22 | description: Sanoma Corporation 23 | - value: BG200356710 24 | description: Ontotext 25 | classification: identifier 26 | translations: {} 27 | -------------------------------------------------------------------------------- /data/datatypes/GB/companies/uk_companyhouseid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - companies 3 | doc: Numeric identifier for company registered with Companies House in the United 4 | Kingdom 5 | is_pii: 'False' 6 | langs: 7 | - en 8 | country: 9 | - GB 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Companies_House 13 | - type: wikidata 14 | url: https://www.wikidata.org/wiki/Property:P2622 15 | id: companyhouseid 16 | name: Companies House company ID (UK) 17 | wikidata_property: P2622 18 | regexp: (AC|FC|GE|GN|GS|IC|IP|LP|NA|NF|NI|NL|NO|NP|NR|NZ|OC|R|RC|SA|SC|SF|SI|SL|SO|SP|SR|SZ|ZC|[0-9]{2})[0-9RS]{6} 19 | examples: 20 | - value: 02050843 21 | description: Aardman Animation 22 | - value: 00185647 23 | description: ' Sainsbury''s' 24 | classification: identifier 25 | translations: {} 26 | -------------------------------------------------------------------------------- /data/datatypes/CA/finances/cagst.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - finances 3 | id: cagst 4 | is_pii: 'False' 5 | langs: 6 | - en 7 | name: GST/HST account number 8 | doc: CA datatype for 'GST/HST account number' (cagst) from rules in 9 | context 'finances'. 10 | classification: identifier 11 | country: 12 | - CA 13 | links: 14 | - type: doc 15 | url: https://www.canada.ca/en/revenue-agency/services/tax/businesses/topics/gst-hst-businesses/registration/gst-hst-account-number.html 16 | examples: 17 | - value: '123456789RT0001' 18 | description: GST/HST account number with 9-digit Business Number, 'RT' program identifier and 4-digit reference 19 | - value: '987654321RT0002' 20 | description: Another valid GST/HST account number format 21 | wikidata_property: '' 22 | translations: {} 23 | regexp: ^[0-9]{9}RT[0-9]{4}$ 24 | -------------------------------------------------------------------------------- /data/datatypes/US/persons/us_dea_certificate.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - pii 3 | - persons 4 | - medical 5 | country: 6 | - US 7 | doc: A DEA number (DEA Registration Number) is an identifier assigned to a health 8 | care provider (such as a physician, physician assistant, nurse practitioner, optometrist, 9 | podiatrist, dentist, or veterinarian) by the United States Drug Enforcement Administration 10 | allowing them to write prescriptions for controlled substances. 11 | id: usdeanumber 12 | is_pii: 'True' 13 | langs: 14 | - en 15 | links: 16 | - type: wikipedia 17 | url: https://en.wikipedia.org/wiki/DEA_number 18 | regexp: '[a-zA-Z]{2}\d{7}|[a-zA-Z]{1}9\d{7}' 19 | name: USA DEA Registration (cerficicate) Number 20 | classification: identifier 21 | examples: [] 22 | wikidata_property: '' 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/flightnumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - transport 3 | doc: In the aviation industry, a flight number or flight designator is a code for 4 | an airline service consisting of two-character airline designator and a 1 to 4 digit 5 | number. 6 | id: flightnumber 7 | is_pii: 'False' 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/Flight_number 13 | - type: wikidata 14 | url: https://www.wikidata.org/wiki/Property:P3090 15 | name: Flight number 16 | wikidata_property: P3090 17 | regexp: ([A-Z]{2,3}|[A-Z][0-9]|[0-9][A-Z])\d{1,4}[A-Z]?|\d{1,4} 18 | examples: 19 | - value: EK521 20 | description: Emirates Flight 521 21 | - value: 4U9525 22 | description: Germanwings Flight 9525 23 | classification: identifier 24 | translations: {} 25 | -------------------------------------------------------------------------------- /data/tools/other/googledatastudio.yaml: -------------------------------------------------------------------------------- 1 | id: googledatastudio 2 | category: other 3 | name: Google Data Studio 4 | doc: 'With Data Studio, you can easily report on data from a wide variety of sources, without programing. In just a few moments, you can connect to data sets such as: Databases, including BigQuery, MySQL, and PostgreSQL. Google Marketing Platform products, including Google Ads, Analytics, Display & Video 360, Search Ads 360.' 5 | website: https://developers.google.com/datastudio/connector/semantics#semantic-type-detection 6 | supported_types: 7 | - geopoint 8 | - country 9 | - city 10 | - datetime 11 | - time 12 | - address 13 | - person_name 14 | - year 15 | - date 16 | - month 17 | - day 18 | - dayofweek 19 | - boolean 20 | - url 21 | 22 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /data/datatypes/any/transport/air/aircraftnumber.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - objectids 3 | - transport 4 | doc: An aircraft registration, alternatively called a tail number, is a code unique 5 | to a single aircraft, required by international convention to be marked on the exterior 6 | of every civil aircraft. 7 | id: aircraftnumber 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/Aircraft_registration 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P426 16 | name: Aircraft/tail number 17 | wikidata_property: P426 18 | regexp: '[A-Z0-9- ]+' 19 | examples: 20 | - value: N-X-211 21 | description: Spirit of St. Louis 22 | - value: N736PA 23 | description: Clipper Victor 24 | classification: identifier 25 | translations: {} 26 | -------------------------------------------------------------------------------- /data/datatypes/US/government/us_piid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - government 3 | country: 4 | - US 5 | doc: The PIID consists of a combination of thirteen to seventeen alpha and/or numeric 6 | characters sequenced to convey certain information 7 | id: uspiid 8 | is_pii: 'False' 9 | langs: 10 | - en 11 | links: 12 | - type: other 13 | url: https://www.acquisition.gov/sites/default/files/current/far/compiled_html/subpart_4.16.html 14 | - type: other 15 | url: https://www.fpds.gov/help/Procurement_Instrument_Identifier.htm 16 | name: Procurement Instrument Identifier (PIID) 17 | regexp: '[0-9A-Z]{13,17}' 18 | examples: 19 | - value: 19RS5022F0185 20 | description: PIID 19RS5022F0185 21 | - value: 19RS5021D0001 22 | description: PIID 19RS5021D0001 23 | classification: identifier 24 | wikidata_property: '' 25 | translations: {} 26 | -------------------------------------------------------------------------------- /data/datatypes/any/chemistry/pubchemid.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - chemistry 3 | doc: PubChem is a database of chemical molecules and their activities against biological 4 | assays. The system is maintained by the National Center for Biotechnology Information 5 | (NCBI), a component of the National Library of Medicine, which is part of the United 6 | States National Institutes of Health (NIH). 7 | id: pubchemid 8 | is_pii: 'False' 9 | langs: 10 | - common 11 | links: 12 | - type: wikipedia 13 | url: https://en.wikipedia.org/wiki/PubChem 14 | - type: wikidata 15 | url: https://www.wikidata.org/wiki/Property:P2153 16 | name: PubChem Substance ID (SID) 17 | wikidata_property: P2153 18 | regexp: '[1-9]\d*' 19 | examples: 20 | - value: '135307179' 21 | description: zeolite Y 22 | classification: identifier 23 | translations: {} 24 | -------------------------------------------------------------------------------- /data/datatypes/any/geo/countrycode_alpha3.yaml: -------------------------------------------------------------------------------- 1 | categories: 2 | - geo 3 | doc: ISO 3166-1 alpha-3 codes are three-letter country codes defined in ISO 3166-1, 4 | part of the ISO 3166 standard published by the International Organization for Standardization 5 | (ISO), to represent countries, dependent territories, and special areas of geographical 6 | interest. 7 | id: countrycode_alpha3 8 | langs: 9 | - common 10 | links: 11 | - type: wikipedia 12 | url: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 13 | - type: wikidata 14 | url: https://www.wikidata.org/wiki/Property:P298 15 | name: ISO 3166-1 Alpha3 country code 16 | semantic_type: country 17 | regexp: '[A-Z]{3}' 18 | wikidata_property: P298 19 | examples: 20 | - value: RUS 21 | description: Russia 22 | - value: DEU 23 | description: Germany 24 | translations: {} 25 | --------------------------------------------------------------------------------