├── data ├── .gitkeep └── Skolenhetsregistret.xlsx ├── tmp └── .gitignore ├── requirements ├── python3.txt └── python2.txt ├── .gitignore ├── Makefile ├── settings.py ├── list_datasets.py ├── download_skolenhetsregistret.py ├── download_surveys.py ├── download_siris.py ├── README.md └── download_artisan.py /data/.gitkeep: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /tmp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /requirements/python3.txt: -------------------------------------------------------------------------------- 1 | requests== 2.23.0 2 | boto3==1.12.36 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.sqlite 3 | data/* 4 | !data/.gitkeep 5 | *.csv# 6 | *.log 7 | -------------------------------------------------------------------------------- /requirements/python2.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | requests-cache==0.5.2 3 | siris-scraper==0.2.2 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | deploy: # upload to aws s3 3 | aws s3 cp data s3://skolverket-statistik/ --recursive 4 | -------------------------------------------------------------------------------- /data/Skolenhetsregistret.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jplusplus/skolstatistik/HEAD/data/Skolenhetsregistret.xlsx -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | 2 | DATA_DIR = "data" 3 | 4 | FORMATS = [ 5 | ("XML", "xml"), 6 | ("EXCEL", "xlsx"), 7 | ("CSV", "csv") 8 | ] 9 | 10 | S3_URL = "https://skolverket-statistik.s3.eu-north-1.amazonaws.com" 11 | S3_BUCKET = "skolverket-statistik" 12 | -------------------------------------------------------------------------------- /list_datasets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # encoding: utf-8 3 | """Create a list/table of all downloaded datasets. 4 | """ 5 | 6 | import os 7 | from collections import OrderedDict 8 | import csv 9 | from settings import DATA_DIR, S3_URL 10 | 11 | all_files = [] 12 | 13 | for subdir, dirs, files in os.walk(DATA_DIR): 14 | for file in files: 15 | file_path = os.path.join(subdir, file) 16 | try: 17 | _, db, skolniva, dataset, fmt, filename = file_path.split("/") 18 | except Exception: 19 | continue 20 | 21 | year, uttag = filename.split(".")[0].split("-") 22 | 23 | url = S3_URL + file_path.replace(DATA_DIR, "") 24 | all_files.append(OrderedDict([ 25 | ("databas", db), 26 | ("skolnivå", skolniva), 27 | ("dataset", dataset), 28 | ("år", year), 29 | ("uttag", uttag), 30 | ("format", fmt), 31 | ("url", url), 32 | ] 33 | )) 34 | 35 | with open('datasets.csv', 'w') as outfile: 36 | fp = csv.DictWriter(outfile, all_files[0].keys()) 37 | fp.writeheader() 38 | fp.writerows(all_files) 39 | -------------------------------------------------------------------------------- /download_skolenhetsregistret.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Fetch all school data from Skolenhetsregistret, at various dates. 3 | """ 4 | import requests 5 | from csv import DictWriter 6 | from tempfile import NamedTemporaryFile 7 | from settings import DATA_DIR, S3_BUCKET 8 | import boto3 9 | import json 10 | 11 | 12 | BASE = "https://api.scb.se/UF0109/v2/skolenhetsregister/sv/skolenhet" 13 | DATES = [ 14 | "20200701", "20200401", "20200101", 15 | "20191001", "20190701", "20190401", "20190101", 16 | "20181001", "20180701", "20180401", "20180101", 17 | "20171001", "20170701", "20170401", "20170101", 18 | "20161001", "20160701", "20160401", "20160101", 19 | "20151001", "20150701", "20150401", "20150101", 20 | "20141001", "20140701", "20140401", "20140101", 21 | "20131001", "20130701", "20130401", "20130101", 22 | ] 23 | # fetch list of schools 24 | d = requests.get(BASE).json() 25 | print(f"Got {len(d['Skolenheter'])} schools") 26 | print(d["Fotnot"]) 27 | 28 | with open(f"{DATA_DIR}/skolenheter.csv", "w") as file_: 29 | writer = DictWriter(file_, fieldnames=[ 30 | "Skolenhetskod", 31 | "Skolenhetsnamn", 32 | "Kommunkod", 33 | "PeOrgNr" 34 | ]) 35 | writer.writeheader() 36 | writer.writerows(d["Skolenheter"]) 37 | 38 | # Fetch data for each school, at numerous times 39 | session = boto3.Session() # profile_name="XXX" 40 | s3_client = session.client("s3") 41 | for school in d["Skolenheter"]: 42 | id_ = school['Skolenhetskod'] 43 | print(f"Downloading {school['Skolenhetsnamn']} ({id_})") 44 | for date in DATES: 45 | endpoint = f"{BASE}/{id_}/{date}" 46 | d = requests.get(endpoint).json() 47 | with NamedTemporaryFile(mode='wt') as tmp: 48 | tmp.write(json.dumps(d)) 49 | tmp.seek(0) 50 | s3_client.upload_file( 51 | tmp.name, 52 | S3_BUCKET, 53 | f"skolenhet/{id_}/{date}.json", 54 | ExtraArgs={'ACL': "public-read", 'CacheControl': "no-cache"}, 55 | ) 56 | -------------------------------------------------------------------------------- /download_surveys.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Fetch all Skolinspektionen surveys from the Skolverket API 3 | """ 4 | import requests 5 | from tempfile import NamedTemporaryFile 6 | from settings import S3_BUCKET 7 | import boto3 8 | import json 9 | from csv import DictWriter 10 | from urllib.parse import quote 11 | 12 | 13 | BASE = "https://api.skolverket.se/planned-educations/" 14 | 15 | next_page = BASE + "school-units?size=100" 16 | codes = [] 17 | 18 | session = boto3.Session() # profile_name="XXX" 19 | s3_client = session.client("s3") 20 | 21 | while next_page: 22 | print("fetching school units from ", next_page) 23 | r = requests.get(next_page) 24 | assert r.status_code == 200 25 | data = r.json() 26 | if (data["_embedded"]["listedSchoolUnits"]): 27 | codes += [s["code"] 28 | for s in data["_embedded"]["listedSchoolUnits"]] 29 | if "next" in data["_links"]: 30 | next_page = data["_links"]["next"]["href"] 31 | else: 32 | next_page = None 33 | 34 | for c in codes: 35 | print(f"Fetching surveys for school unit {c}") 36 | r = requests.get(BASE + f"school-units/{c}/surveys") 37 | data = r.json() 38 | assert r.status_code == 200 39 | data = r.json() 40 | for group in data["_links"].keys(): 41 | if group == "self": 42 | continue 43 | 44 | href = data["_links"][group]["href"] 45 | r = requests.get(href) 46 | assert r.status_code == 200 47 | d = r.json() 48 | 49 | s3_key_name = f"survey/{c}/{group}.json" 50 | with NamedTemporaryFile(mode='wt') as tmp: 51 | tmp.write(json.dumps(d)) 52 | tmp.seek(0) 53 | s3_client.upload_file( 54 | tmp.name, 55 | S3_BUCKET, 56 | s3_key_name, 57 | ExtraArgs={ 58 | 'ACL': "public-read", 59 | 'CacheControl': "no-cache" 60 | }, 61 | ) 62 | 63 | s3_base = f"https://{S3_BUCKET}.s3.eu-north-1.amazonaws.com/" 64 | s3_path = s3_base + quote(s3_key_name) 65 | with open("swagger.csv", "a") as file_: 66 | writer = DictWriter(file_, fieldnames=[ 67 | "school", 68 | "survey", 69 | "path", 70 | ]) 71 | writer.writerow({ 72 | 'school': c, 73 | 'survey': group, 74 | 'path': s3_path, 75 | }) 76 | -------------------------------------------------------------------------------- /download_siris.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # encoding: utf-8 3 | """Downloads all datasets from Siris locally. 4 | """ 5 | import os 6 | import requests 7 | import requests_cache 8 | from siris.scraper import SirisScraper 9 | from settings import DATA_DIR, FORMATS 10 | requests_cache.install_cache('demo_cache') 11 | 12 | DOWNLOAD_DIR = os.path.join(DATA_DIR, "siris") 13 | 14 | # Init scraper 15 | scraper = SirisScraper() 16 | 17 | DOWNLOADED_DATASETS = [] 18 | for subdir, dirs, files in os.walk(DATA_DIR): 19 | for file in files: 20 | file_path = os.path.join(subdir, file) 21 | try: 22 | _, db, skolniva, dataset, fmt, filename = file_path.split("/") 23 | dataset_id, _ = dataset.split("-") 24 | DOWNLOADED_DATASETS.append(dataset_id) 25 | except Exception: 26 | continue 27 | 28 | # Fritidshem, grundskola... 29 | for verksamhetsform in scraper.items: 30 | print(u"VERKSAMHETSFORM: {}".format(verksamhetsform.label)) 31 | 32 | for dataset in verksamhetsform.items: 33 | #if dataset.id in DOWNLOADED_DATASETS: 34 | # print(u"Already downloaded {} ({})".format(dataset.label, dataset.id)) 35 | # continue 36 | 37 | for period, _ in dataset.periods: 38 | print(u"- " + period) 39 | uttag = dataset.get_uttag(period) 40 | if len(uttag) == 0: 41 | uttag = [("1", None)] 42 | 43 | for uttag_id, _ in uttag: 44 | base_url = dataset.get_xml_url(period, uttag_id) 45 | for url_fmt, file_fmt in FORMATS: 46 | file_url = base_url.replace("XML", url_fmt) 47 | print(u"Downloading {}".format(file_url)) 48 | 49 | r = requests.get(file_url) 50 | 51 | outdir = os.path.join(DOWNLOAD_DIR, 52 | verksamhetsform.label, 53 | dataset.id + "-" + dataset.label.replace("/"," och "), 54 | url_fmt) 55 | if not os.path.exists(outdir): 56 | os.makedirs(outdir) 57 | 58 | filename = "{}-{}.{}".format(period, uttag_id, file_fmt) 59 | filepath = os.path.join(outdir, filename) 60 | 61 | with open(filepath, 'wb') as f: 62 | f.write(r.content) 63 | 64 | print(u"Storing {}".format(filepath)) 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Operation: Rädda skolstatistiken 2 | 3 | Den 1 september 2020 tvingas Skolverket avpublicera all skolstatistik som inte är på riksnivå. Vi kan därefter med andra ord inte veta något om hur behörighetsnivån i personalen skiljer sig åt i olika delar av landet eller vilka betyg elever snittar i olika skolor. 4 | 5 | För att rädda det som räddas kan försöker vi hämta hem så mycket av de nu tillgängliga statistiken som möjligt och göra den publikt tillgänglig för forskare, journalister, politiker och allmänhet också efter den 1 september 2020. 6 | 7 | Det här repot samlar [_länkar_](https://github.com/jplusplus/skolstatistik/blob/master/datasets.csv) till nedladdade dataset, samt Python-kod för att själv göra om nerladdningen (eller se hur vi gjorde). Filerna ligger på Amazon molntjänst AWS S3. 8 | 9 | ## Dataset 10 | 11 | ### Från Siris-databasen 12 | 13 | Från [Siris sökgränssnitt](https://siris.skolverket.se/siris/ris.export_stat.form) har vi hämtat all statistik aggregerad per skola, kommun eller huvudman. En fil består av uppgifter för alla huvudmän, skolor, eller kommuner för ett givet läsår. Till exempel lärarbehörigheten per kommun i grundskolan ett visst läsår. 14 | 15 | Filerna finns i tre format: csv, Excel och XML. De sparas till en mappstruktur som följer följande mönster: 16 | 17 | - `siris/[Skolnivå]/[Statistikområde]/[CSV|EXCEL|XML]/[År]-[uttag].[csv|xlsx|csv]` 18 | 19 | Vi hämtar endast filer som aggregerar statistik för flera skolor, huvudmän eller kommuner till en fil. Sammanställningar av typen "all statistik för Bjuvs kommun" saknas med andra ord, men kan återskapas utifrån datan här. 20 | 21 | Du hittar en förteckning över samtliga i dataset från Siris i [datasets.csv](./datasets.csv). 22 | 23 | ### Från Jämförelsetal (Artisan) 24 | 25 | Från http://www.jmftal.artisan.se/ har vi hämtat all data för alla kommuner, län och kommungrupper i csv-format. 26 | 27 | Länkar till datan finns i [artisan.csv](./artisan.csv). 28 | 29 | ### Från Skolenhetsregistret 30 | 31 | Det översiktliga Excelarket finns i [/data](/data)-mappen 32 | 33 | Detaljerad data om varje skolenhet, den första januari, april, juli och oktober varje år, finns i json-filer på följande format: 34 | `https://skolverket-statistik.s3.eu-north-1.amazonaws.com/skolenhet/{SKOLENHETS-ID}/{ÅÅÅÅMMDD}.json`, till exempel [https://skolverket-statistik.s3.eu-north-1.amazonaws.com/skolenhet/10110104/20200101.json](https://skolverket-statistik.s3.eu-north-1.amazonaws.com/skolenhet/10110104/20200101.json) 35 | 36 | ### Skolverkets API (Swagger) 37 | 38 | Alla enkäter till elever och vårdnadshavare (Skolinspektionens skolenkät), som json-filer. Dessa ligger som json-filer på Amazon AWS S3. 39 | 40 | Länkar till filerna finns i [swagger.csv](./swagger.csv). 41 | 42 | ## Nerladdningsskript 43 | 44 | För referens finns skripten vi använt för att ladda ner data inkluderade. De fungerar förstås inte längre, eftersom datakällorna inte längre är tillgängliga. 45 | 46 | ### `download_siris.py` 47 | Installera först de paket som behövs med `pip install -r requirements/python2.txt`. 48 | 49 | `download_siris.py` måste köras med Python 2.7 på grund av beroende av paketet [siris_scraper](https://pypi.org/project/siris-scraper/), som vi själva utvecklat tidigare för att skrapa data från Skolverket. 50 | 51 | ### `download_skolenhetsregistret.py` 52 | Installera först de paket som behövs med `pip3 install -r requirements/python3.txt`. 53 | 54 | Ange en S3-bucket i `settings.py`, och kör `./download_skolenhetsregistret.py` 55 | 56 | ### `download_artisan.py` 57 | Installera först de paket som behövs med `pip3 install -r requirements/python3.txt`. 58 | 59 | Den här skrejpern använder Selenium, och kräver en [https://selenium-python.readthedocs.io/installation.html#drivers](webbläsar-driver) installerad. På Ubuntu kan du installera Gecko med `sudo apt-get install firefox-geckodriver`. 60 | 61 | Ange en S3-bucket i `settings.py`. 62 | 63 | Kör sedan `./download_artisan.py` 64 | 65 | ### `download_surveys.py` 66 | Installera först de paket som behövs med `pip3 install -r requirements/python3.txt`. 67 | 68 | Ange en S3-bucket i `settings.py`. 69 | 70 | Kör sedan `./download_surveys.py` 71 | 72 | ## Vem ligger bakom insamlingen? 73 | 74 | Vi som gör insamlingen är datajournalister på [J++ Stockholm](https://jplusplus.org/sv/) och [Newsworthy](https://www.newsworthy.se/sv). 75 | 76 | ## Andra projekt 77 | 78 | Vi är många journalister, forskare och andra som laddar ner data från Skolverket just nu. Förhoppningsvis finns det en komplett samling där ute i månadsskiftet, även om någon missat något. Här är länkar till andra projekt vi känner till: 79 | 80 |
81 |
Tankesmedjan Balans 82 |
Sammanställd statistik för utvalda huvudmän, skolor och kommuner (ingen rådata, men lättläst för den som inte programmerar) 83 | 84 |
Staffan Betnér, statistiker 85 |
Zip-arkiv med Excel-, CSV- och XML-filer i överskådlig mappstruktur 86 |
87 | -------------------------------------------------------------------------------- /download_artisan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Fetch all data from http://www.jmftal.artisan.se/ 3 | 4 | The search interface uses sessions cookies that have proven tricky to emulate, 5 | so this scraper is browser based, using Selenium. 6 | """ 7 | 8 | from selenium import webdriver 9 | # from selenium.webdriver.common.keys import Keys 10 | from selenium.webdriver.support.ui import Select, WebDriverWait 11 | from selenium.webdriver.support import expected_conditions 12 | from selenium.webdriver.common.by import By 13 | from urllib.parse import quote 14 | from time import sleep, time 15 | from csv import DictWriter, DictReader 16 | from hashlib import md5 17 | from bs4 import BeautifulSoup 18 | 19 | import boto3 20 | import os 21 | import pathlib 22 | 23 | from settings import S3_BUCKET 24 | 25 | 26 | COMPARISONS = [ 27 | "30001", "30002", "30003", "30004", 28 | "30005", "30006", "30007", "30008", 29 | "30009", "30010", "30011", "30012", 30 | "30013", "30014", "30015", "30016", 31 | "30017", "30018", "30019", "30020", 32 | "30021", "30017", 33 | "3", 34 | "42", "43", "44", "45", "46", "47", "48", "49", 35 | "31", "32", "33", "34", "35", "36", "37", "38", "39", 36 | ] 37 | 38 | # Load list of already fetched datasets 39 | downloaded = [] 40 | with open("artisan.csv", "r") as file_: 41 | reader = DictReader(file_) 42 | downloaded = [(r["school_type"], r["dataset"]) for r in reader] 43 | 44 | session = boto3.Session(profile_name="jplusplus") # profile_name="XXX" 45 | s3_client = session.client("s3") 46 | 47 | """ 48 | # Special profile for downloading 49 | # But the server seems to choke on large requests, so not doing that atm 50 | profile = webdriver.FirefoxProfile() 51 | profile.set_preference("browser.download.folderList", 2) # do not use default 52 | profile.set_preference("browser.download.manager.showWhenStarting", False) 53 | profile.set_preference("browser.download.dir", "./tmp") 54 | mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" 55 | profile.set_preference("browser.helperApps.neverAsk.saveToDisk", mime) 56 | """ 57 | 58 | driver = webdriver.Firefox() 59 | driver.get("http://www.jmftal.artisan.se/databas.aspx?presel#tab-0") 60 | 61 | # Set ”urval” to “alla”, to get all datasets 62 | id_ = "ctl00_ContentPlaceHolder1_ddlStatusYear" 63 | select = Select(driver.find_element_by_id(id_)) 64 | select.select_by_visible_text("Alla") 65 | sleep(2) 66 | 67 | # loop through school forms 68 | id_ = "ctl00_ContentPlaceHolder1_ddlSkolformer" 69 | select = Select(driver.find_element_by_id(id_)) 70 | options = [e.text for e in select.options] 71 | for o in options: 72 | select.select_by_visible_text(o) 73 | sleep(3) 74 | 75 | # Select all municipalities (‘whole nation’) is already selected 76 | xp = "//ul/li[@class='lbxAllSchoolArea nosearch']" 77 | checkbox = driver.find_element_by_xpath(xp) \ 78 | .find_element_by_tag_name('input') 79 | checkbox.click() 80 | assert checkbox.is_selected() 81 | 82 | # Select all years 83 | years = driver.find_elements_by_xpath("//ul/li[@class='lbxYears']") 84 | for year in years[1:]: # first year already selected 85 | checkbox = year.find_element_by_tag_name('input') 86 | checkbox.click() 87 | assert checkbox.is_selected() 88 | 89 | xp = "//ul/li[@class='lbxCompareTo']/input" 90 | checkboxes = driver.find_elements_by_xpath(xp) 91 | for checkbox in checkboxes: 92 | val = checkbox.get_attribute("value") 93 | if val == "-99": 94 | # This one appears in another menu 95 | continue 96 | if val in COMPARISONS: 97 | checkbox.click() 98 | assert checkbox.is_selected() 99 | else: 100 | assert not checkbox.is_selected() 101 | 102 | # Loop through datasets (”variabler”) 103 | datasets = driver.find_elements_by_xpath("//ul/li[@class='lbxVariables']") 104 | for dataset in datasets: 105 | name = dataset.text 106 | if (o, name) in downloaded: 107 | print(f"Skipping already downloaded {name} ({o})") 108 | continue 109 | 110 | print(f"Fetching {name} ({o})") 111 | data_checkbox = dataset.find_element_by_tag_name('input') 112 | # select dataset 113 | data_checkbox.click() 114 | assert data_checkbox.is_selected() 115 | 116 | id_ = "ctl00_ContentPlaceHolder1_btnCreateTable" 117 | btn = driver.find_element_by_id(id_) 118 | btn.click() 119 | # Wait for search results to load 120 | WebDriverWait(driver, 30).until( 121 | expected_conditions.visibility_of_element_located( 122 | (By.CSS_SELECTOR, ".resultTable") 123 | ) 124 | ) 125 | sleep(1) 126 | # There are two possible layouts that seem to be randomly(?) chosen 127 | # If we are in table2 mode, select table1 128 | cls_name = driver.find_element_by_class_name("resultTable") \ 129 | .get_attribute("class") 130 | if "table2" in cls_name: 131 | id_ = "ctl00_ContentPlaceHolder1_btnFlipTable" 132 | btn = driver.find_element_by_id(id_) 133 | btn.click() 134 | WebDriverWait(driver, 30).until( 135 | expected_conditions.visibility_of_element_located( 136 | (By.XPATH, "//table[@class='resultTable table1']") 137 | ) 138 | ) 139 | sleep(1) 140 | 141 | # Collect data 142 | data = [] 143 | html = driver.page_source 144 | soup = BeautifulSoup(html, 'html.parser') 145 | xp = "//table[@class='resultTable table1']" 146 | regions = soup.find_all("table", {'class': "resultTable table1"}) 147 | if o != "Specialskola": 148 | # Special skola is not aggreggated by municipality 149 | assert len(regions) >= 290 + 1 150 | # at least all municipalities + nation 151 | assert len(regions) <= 290 + 1 + len(COMPARISONS) 152 | 153 | for r in regions: 154 | rows = r.find_all("tr") 155 | region_name = rows[0].find_all("th")[1].text 156 | years = [y.text for y in rows[1].find_all("th")] 157 | dataset_name = rows[2].find("th").text 158 | try: 159 | assert dataset_name == name 160 | except AssertionError: 161 | print("WARNING Mismatching dataset names:", dataset_name, name) 162 | values = [v.text for v in rows[2].find_all("td")] 163 | assert len(values) == len(years) 164 | item = { 165 | 'region': region_name, 166 | } 167 | for y, v in zip(years, values): 168 | item[y] = v 169 | data.append(item) 170 | 171 | tmp_file = "./tmp/tmp.csv" 172 | with open(tmp_file, "w") as file_: 173 | writer = DictWriter(file_, fieldnames=item.keys()) 174 | writer.writeheader() 175 | writer.writerows(data) 176 | 177 | s3_key_name = f"artisan/{o}/{name}.csv" 178 | s3_client.upload_file( 179 | tmp_file, 180 | S3_BUCKET, 181 | s3_key_name, 182 | ExtraArgs={'ACL': "public-read", 'CacheControl': "no-cache"}, 183 | ) 184 | 185 | s3_base = f"https://{S3_BUCKET}.s3.eu-north-1.amazonaws.com/" 186 | s3_path = s3_base + quote(s3_key_name) 187 | with open("artisan.csv", "a") as file_: 188 | writer = DictWriter(file_, fieldnames=[ 189 | "school_type", 190 | "dataset", 191 | "path", 192 | "size", 193 | "md5", 194 | ]) 195 | writer.writerow({ 196 | 'school_type': o, 197 | 'dataset': name, 198 | 'path': s3_path, 199 | 'size': os.stat(tmp_file).st_size, 200 | 'md5': md5(pathlib.Path(tmp_file).read_bytes()).hexdigest(), 201 | }) 202 | 203 | # Return to search tab 204 | id_ = "submenu1" 205 | btn = driver.find_element_by_id(id_) \ 206 | .find_element_by_tag_name('a') 207 | btn.click() 208 | 209 | # de-select dataset again 210 | data_checkbox.click() 211 | assert not data_checkbox.is_selected() 212 | 213 | driver.close() 214 | --------------------------------------------------------------------------------