├── parametres.py
├── renommer_fichiers.py
├── generer_nocandidats.py
├── monmaster_scraping.py
└── README.md


/parametres.py:
--------------------------------------------------------------------------------
1 | username = "*******"
2 | password = "*******"
3 | 
4 | 
5 | folder_path = "./nom_du_dossier_cree/"
6 | code_formation = '*********'
7 | 


--------------------------------------------------------------------------------
/renommer_fichiers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pandas as pd
 4 | 
 5 | from parametres import code_formation
 6 | from parametres import folder_path
 7 | 
 8 | file = glob.glob(folder_path+"*"+code_formation+".xlsx")
 9 | print(file)
10 | if len(file) != 1:
11 |     raise ValueError("Fichiers .xlsx non trouvé ou multiple")
12 |     
13 | tot = pd.read_excel(file[0])
14 | 
15 | def rename_files(fpath, prefix, df):
16 |     for i, row in df.iterrows():
17 |         old_name = prefix + row['Numéro de candidat'] + '.pdf'
18 |         new_name = row['Nom de naissance'].replace(' ','') + '_' + row['Prénom'].replace(' ','') + '-' + row['Numéro de candidat'] + '.pdf'
19 |         try:
20 |             os.rename(fpath+old_name, fpath+new_name)
21 |         except FileNotFoundError:
22 |             print(f"Fichier {old_name} non trouvé")
23 | 
24 | 
25 | # Call the rename_files function with the prefix and dataframe as arguments
26 | rename_files(folder_path,code_formation+'-', tot)


--------------------------------------------------------------------------------
/generer_nocandidats.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pandas as pd
 4 | 
 5 | from parametres import code_formation
 6 | from parametres import folder_path
 7 | 
 8 | file = glob.glob(folder_path+"*"+code_formation+".xlsx")
 9 | 
10 | print(file)
11 | if len(file) != 1:
12 |     raise ValueError("Fichiers .xlsx non trouvé ou multiple")
13 |     
14 | tot = pd.read_excel(file[0])
15 | 
16 | # Lister tous les fichiers
17 | files = os.listdir(folder_path)
18 | 
19 | # Seulement inclure fichiers avec préfixe
20 | files = [f for f in files if f.startswith(code_formation+'-')]
21 | 
22 | # Extraire les codes candidats des noms de fichiers
23 | codes = [f.replace(code_formation+'-', '').replace('.pdf', '') for f in files]
24 | 
25 | # Ranger dans un dataframe
26 | df = pd.DataFrame({'Code': codes})
27 | 
28 | print('\n Codes fichiers :')
29 | print(df)
30 | 
31 | # Trouver les manquants
32 | missing_codes = tot[~tot['Numéro de candidat'].isin(df['Code'])][['Numéro de candidat']].reset_index(drop=True)
33 | 
34 | 
35 | # Renommer la colonne "no"
36 | missing_codes = missing_codes.rename(columns={'Numéro de candidat': 'no'})
37 | 
38 | print('\n Codes manquants :')
39 | print(missing_codes)
40 | 
41 | missing_codes.to_csv('no_candidats.csv', index=False)


--------------------------------------------------------------------------------
/monmaster_scraping.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # In[ ]:
 5 | 
 6 | from selenium import webdriver
 7 | from selenium.webdriver.common.by import By
 8 | from selenium.webdriver.common.keys import Keys
 9 | from selenium.webdriver.support.ui import WebDriverWait
10 | from selenium.webdriver.support import expected_conditions as EC
11 | import time
12 | import pandas as pd
13 | 
14 | 
15 | from parametres import code_formation
16 | from parametres import username
17 | from parametres import password
18 | 
19 | 
20 | cand = pd.read_csv("no_candidats.csv")
21 | print(cand)
22 | 
23 | 
24 | 
25 | 
26 | driver = webdriver.Safari()
27 | driver.get("https://interne.candidature.monmaster.gouv.fr/formationscandidatables/"+code_formation+"/candidatures")
28 | driver.implicitly_wait(10)
29 | time.sleep(4)
30 | 
31 | # In[ ]:
32 | 
33 | 
34 | elem = driver.find_element(By.ID,"username")
35 | elem.clear()
36 | elem.send_keys(username)
37 | elem = driver.find_element(By.ID,"password")
38 | elem.clear()
39 | elem.send_keys(password)
40 | elem.send_keys(Keys.RETURN)
41 | 
42 | 
43 | # In[ ]:
44 | 
45 | time.sleep(4)
46 | for index, row in cand.iterrows():
47 |     
48 |     print("Telechargement candidat ",index," code ",row.no)
49 |     WebDriverWait(driver, timeout=20).until(EC.presence_of_element_located(
50 |         (By.CSS_SELECTOR,"[id^=page-candidatures-noCandidat-]")))
51 |     time.sleep(1)
52 |     elem = driver.find_element(By.CSS_SELECTOR,"[id^=page-candidatures-noCandidat-]")
53 |     elem.clear()
54 |     elem.send_keys(row.no)
55 |     elem.send_keys(Keys.RETURN)
56 |     
57 |     WebDriverWait(driver, timeout=20).until(EC.element_to_be_clickable(
58 |         (By.XPATH,"//a[contains(text(), 'Télécharger la candidature')]")))
59 |     time.sleep(1)
60 |     elem = driver.find_element(By.XPATH,"//a[contains(text(), 'Télécharger la candidature')]")
61 |     elem.click()
62 |     #driver.execute_script('arguments[0].click()', elem)
63 |     
64 |     time.sleep(8)
65 |     
66 |     driver.back()
67 | 
68 | 
69 | 
70 | driver.back()
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # monmaster_scraping
 2 | 
 3 | Simple outil pour télécharger automatiquement les dossiers complets monmaster.
 4 | 
 5 | **Mode d'emploi :**
 6 | - Placer les quatre fichiers .py dans un dossier
 7 | - Si vous n'êtes pas sur Mac, dans le fichier `monmaster_scraping.py` remplacer la ligne `driver = webdriver.Safari()` selon votre plateforme/navigateur internet 
 8 | https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/
 9 | 
10 | - Créer un sous-dossier pour une formation
11 | - Placer dans ce sous-dossier le fichier .xlsx obtenu en téléchargeant "Fichier des candidatures confirmées" dans l'espace monmaster de votre formation
12 | 
13 | ![Capture d’écran 2023-04-26 à 10 37 26](https://user-images.githubusercontent.com/386604/234519453-a9e93deb-6749-436a-bf8c-e8ec64fe6742.png)
14 | 
15 | - Dans le fichier `parametres.py`, remplacer les ****** par dans l'ordre :
16 |   - votre identifiant
17 |   - votre mot de passe
18 |   - Le chemin et nom du sous-dossier créé plus tôt
19 |   - La référence de votre formation : On le trouve soit dans la colonne "référence" dans la liste des formations candidatables, soit dans l'onglet "Formation candidatable" une fois dans votre formation (sous le cadre rouge de censure dans cette capture)
20 |   
21 |   ![Capture d’écran 2023-04-26 à 10 37 16](https://user-images.githubusercontent.com/386604/234519377-5ec5ff02-0982-4bc5-900f-cf39197927c2.png)
22 | 
23 | - Générer la liste des candidats à télécharger en lancant le script : `python generer_nocandidats.py` Cela doit créer un fichier `no_candidats.csv` dans le dossier des scripts
24 | 
25 | - Lancer le script de téléchargement : `python monmaster_scraping.py`
26 | - Aller se faire un (long) café 
27 | - Déplacer tous les fichiers téléchargés dans le sous-dossier de la formation créé plus tôt
28 | - Relancer le script `python generer_nocandidats.py' pour vérifier s'il y a des fichiers manquants (si oui, relancer le script de téléchargement, etc. )
29 | - Si vous souhaitez renommer les fichiers au format Nom_Prenom-code.pdf, lancez le script `python renommer_fichiers.py
30 | 
31 | **En cas d'erreur**
32 | 
33 | Si vous rencontrez une erreur "not clickable", commentez avec # la ligne 61 `elem.click()` du script `monmaster_scraping.py` et décommentez la ligne 62 `driver.execute_script('arguments[0].click()', elem)`
34 | 


--------------------------------------------------------------------------------