├── .gitignore ├── README.md ├── data ├── labeled │ ├── barbera_automated_2021 │ │ └── README.md │ ├── benoit_crowdsourced_2016 │ │ └── README.md │ ├── bestvater_sentiment_2023 │ │ └── README.md │ ├── bonikowski_politics_2022 │ │ └── README.md │ ├── dai_when_2022 │ │ └── README.md │ ├── fornaciari_we_2021 │ │ ├── .gitignore │ │ ├── README.md │ │ ├── annotation_set_01.csv │ │ ├── annotation_set_02.csv │ │ ├── annotation_set_03.csv │ │ └── annotations │ │ │ ├── classification │ │ │ ├── group1 │ │ │ │ ├── buttler.dominik.csv │ │ │ │ ├── jonas.bruder.csv │ │ │ │ └── samuel.droessler.csv │ │ │ ├── group2 │ │ │ │ ├── antonia.granser.csv │ │ │ │ ├── l.j.herbig.csv │ │ │ │ └── tabea.krauter.csv │ │ │ ├── group3 │ │ │ │ ├── johanneskuhling.csv │ │ │ │ ├── lopatina.csv │ │ │ │ └── luisa.kutlar.csv │ │ │ └── llms │ │ │ │ ├── DeepSeek-V3-0324.csv │ │ │ │ ├── Llama-4-Maverick-17B-128E-Instruct.csv │ │ │ │ ├── Qwen3-235B-A22B-Instruct-2507.csv │ │ │ │ └── gpt-oss-120b.csv │ │ │ └── extraction │ │ │ ├── group1 │ │ │ ├── carolin.rinner.jsonl │ │ │ ├── ingmar.rapp.jsonl │ │ │ ├── martin.jsonl │ │ │ └── quiring.paul.jsonl │ │ │ ├── group2 │ │ │ ├── augustijn.vanrode.jsonl │ │ │ ├── colin.negenborn.jsonl │ │ │ ├── dino.wildi.jsonl │ │ │ └── i.zeberioaguerrevere.jsonl │ │ │ └── llms │ │ │ ├── DeepSeek-V3-0324.jsonl │ │ │ ├── Llama-4-Maverick-17B-128E-Instruct.jsonl │ │ │ ├── Qwen3-235B-A22B-Instruct-2507.jsonl │ │ │ └── gpt-oss-120b.jsonl │ ├── gilardi_chatgpt_2023 │ │ └── README.md │ ├── miller_active_2020 │ │ └── README.md │ ├── petkevic_political_2022 │ │ └── README.md │ ├── sylvester_parlee_2022 │ │ └── README.md │ └── theocharis_dynamics_2020 │ │ └── README.md └── misc │ └── bank_sentences_with_senses.csv ├── notebooks ├── .assets │ ├── task_types-multilabel_classification.svg │ ├── task_types-pairwise_comparison.svg │ ├── task_types-sentence_classification.svg │ └── task_types-token_classification.svg ├── .gitignore ├── README.md ├── annotation │ ├── .gitignore │ ├── annotation_aggregation_pledge_classification.ipynb │ ├── annotation_aggregation_pledge_extraction.ipynb │ ├── compute_ica_pledge_classification.ipynb │ ├── compute_ica_pledge_classification.py │ ├── compute_ica_pledge_extraction.ipynb │ └── compute_ica_pledge_extraction.py ├── embedding │ ├── contextualized_embedding_transformers_explained.ipynb │ ├── sentence_embedding_basics.ipynb │ └── word_embedding_basics.ipynb ├── encoder_finetuning │ ├── classification_evaluation_metrics_explained.ipynb │ ├── finetune_sequence_classifier.ipynb │ ├── finetune_sequence_classifier_multilabel.ipynb │ ├── finetune_sequence_classifier_multilabel_setfit.ipynb │ ├── finetune_sequence_scorer_pairwise.ipynb │ ├── finetune_token_classifier.ipynb │ └── finetuning_logic_illustrated.ipynb ├── incontext_learning │ ├── llm_fewshot_classification.ipynb │ ├── llm_icl_pledge_extraction.ipynb │ ├── llm_openai_chat_intro.ipynb │ ├── llm_text_generation_explained.ipynb │ ├── llm_tokenization_and_costs.ipynb │ ├── llm_zeroshot_classification.ipynb │ └── llm_zeroshot_classification_structured.ipynb └── topic_modeling │ └── topicmodel_bertopic.ipynb ├── setup ├── .gitignore ├── README.md ├── imgs │ ├── openai_billing_configure_payment.png │ ├── openai_billing_overview.png │ ├── openai_key_create_new.png │ ├── openai_project_create_new.png │ └── vscode_python_environments_popup.png ├── requirements.txt ├── setup_github_clone.md ├── setup_google_colab.md ├── setup_macos.md ├── setup_ollama.md ├── setup_openai.md ├── setup_python_with_anaconda.md ├── setup_vs_code.md ├── test.ipynb ├── test_ollama.ipynb └── test_openai_key.ipynb └── src ├── __init__.py ├── annotation ├── agreement.py └── dawidskene.py ├── finetuning.py ├── metrics.py ├── setfit_utils.py ├── topic_modeling.py └── utils ├── io.py └── token_counters.py /.gitignore: -------------------------------------------------------------------------------- 1 | # files and folders 2 | .crossnote/ 3 | **/*.DS_Store 4 | *.DS_Store 5 | 6 | todos.md 7 | 8 | data/ 9 | data/corpora/* 10 | models/* 11 | 12 | outline.md 13 | _backup/ 14 | readings/ 15 | 16 | results/*/checkpoint-*/ 17 | 18 | slides/ 19 | 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | share/python-wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .nox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | *.py,cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | cover/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | .pybuilder/ 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | # For a library or package, you might want to ignore these files since the code is 106 | # intended to run in multiple environments; otherwise, check them in: 107 | # .python-version 108 | 109 | # pipenv 110 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 111 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 112 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 113 | # install all needed dependencies. 114 | #Pipfile.lock 115 | 116 | # poetry 117 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 118 | # This is especially recommended for binary packages to ensure reproducibility, and is more 119 | # commonly ignored for libraries. 120 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 121 | #poetry.lock 122 | 123 | # pdm 124 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 125 | #pdm.lock 126 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 127 | # in version control. 128 | # https://pdm.fming.dev/#use-with-ide 129 | .pdm.toml 130 | 131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 132 | __pypackages__/ 133 | 134 | # Celery stuff 135 | celerybeat-schedule 136 | celerybeat.pid 137 | 138 | # SageMath parsed files 139 | *.sage.py 140 | 141 | # Environments 142 | .env 143 | .venv 144 | env/ 145 | venv/ 146 | ENV/ 147 | env.bak/ 148 | venv.bak/ 149 | 150 | # Spyder project settings 151 | .spyderproject 152 | .spyproject 153 | 154 | # Rope project settings 155 | .ropeproject 156 | 157 | # mkdocs documentation 158 | /site 159 | 160 | # mypy 161 | .mypy_cache/ 162 | .dmypy.json 163 | dmypy.json 164 | 165 | # Pyre type checker 166 | .pyre/ 167 | 168 | # pytype static type analyzer 169 | .pytype/ 170 | 171 | # Cython debug symbols 172 | cython_debug/ 173 | 174 | # PyCharm 175 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 176 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 177 | # and can be added to the global gitignore or merged into this file. For a more nuclear 178 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 179 | #.idea/ 180 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Course Materials for "From Embeddings to LLMs: Advanced Text Analysis with Python" 2 | 3 | This repository contains the materials for the summer school course "From Embeddings to LLMs: Advanced Text Analysis with Python" taught by Hauke Licht (hauke.licht@uibk.ac.at). 4 | 5 | ## Setup — Required before the first course session! 6 | 7 | Please setup your computer **before the course starts** by following the instructions linked below! 8 | There will be *no* time for this on the first day of the course. 9 | 10 | Please refer to the markdown files in the [setup](./setup/) folder for detailed instructions: 11 | 12 | 1. Setup python and conda on the laptop you will use during the course: [setup_python_with_anaconda.md](./setup/setup_python_with_anaconda.md) (also see [setup_macos.md](./setup/setup_macos.md) if you are Mac user) 13 | 2. Clone the github repository: [setup_github_clone.md](./setup/setup_github_clone.md) 14 | 3. software setup 15 | 1. Install *VS Code*: [setup_vs_code.md](./setup/setup_vs_code.md) 16 | 2. Download and install `ollama`: [setup_ollama.md](./setup/setup_ollama.md) 17 | 4. create external accounts 18 | 1. Create a Google Colab (optionally, upgrade to _Pro_): [setup_google_colab.md](./setup/setup_google_colab.md) 19 | 2. optional: Create and OpenAI account and API key (optionally, upgrade to _Pro_): [setup_openai.md](./setup/setup_openai.md) 20 | -------------------------------------------------------------------------------- /data/labeled/barbera_automated_2021/README.md: -------------------------------------------------------------------------------- 1 | # News articles coded for economic news topic and sentiment from Barberá et al. (2021) 2 | 3 | author: Hauke Licht & Naomi Yagai\ 4 | date: 2024-01-12 5 | 6 | ## Description 7 | 8 | In their 2021 *Political Analysis* paper "Automated Text Classification of News Articles: A Practical Guide," Barberá and colleagues analyze the tone of coverage of the US national economy in the New York Times. To this end, they identify articles about the economy and their tone (positive--negative). 9 | 10 | Their measurements are generated through supervised text classification based on trained as well as crowd coders' annotations of news articles' texts. 11 | 12 | ## Annotation procedure 13 | 14 | The authors have distributed multiple samples for annotation by trained coders and/or crowd coders. Here, we focus on dataset **5AC**, which records article-segment-level codings (i.e., of the first five sentences of an article) by 3-10 crowd coders. Each article segment was coded by each coder along two coding dimensions: 15 | 16 | - *relevance:* 'yes' if the article gives a coder indication of how the economy is performing, 'not sure' if a coder could not determine, and 'no' otherwise 17 | - *positivity:* a score ranging from 1 (very negative) to 9 (very positive) coders assign to relevant news articles (i.e., those for which *relevance* == 'yes') 18 | 19 | ## The data 20 | 21 | source: replication data on Political Analysis' Harvard Dataverse: 22 | 23 | ## Download data files 24 | 25 | | dataset_key | file | url | 26 | |:-----------------------|:-----------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------| 27 | | barbera_automated_2021 | barbera_automated_2021-econ_news_sentiment.tsv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/barbera_automated_2021/barbera_automated_2021-econ_news_sentiment.tsv | -------------------------------------------------------------------------------- /data/labeled/dai_when_2022/README.md: -------------------------------------------------------------------------------- 1 | # Identifying populist rhetoric in U.S. presidential campaign speeches from Dai & Kustov (2022) 2 | 3 | author: Hauke Licht & Naomi Yagai\ 4 | date: 2024-01-12 5 | 6 | ## Description 7 | 8 | In their 2022 *Political Communication* paper "When Do Politicians Use Populist Rhetoric? Populism as a Campaign Gamble," Dai and Kustov assess the prevalence of populist rhetoric across U.S. presidential campaign speeches (1952--2016). 9 | 10 | Their measurements are generated through a novel automated text analysis method utilizing active learning and word embedding. 11 | 12 | ## The raw data 13 | 14 | The authors built a comprehensive U.S. presidential campaign corpus consisting of 4,314 speeches from 1952 to 2016. 15 | 16 | The authors explain the corpus in their article section "Data" (pp. 388) as follows. 17 | 18 | > The speeches are collected from two data sources: The Annenberg/Pew Archive of Presidential Campaign Discourse (Annenberg, 2000) and The American Presidency Project (Woolley & Peters, 2008) hosted at the University of California, Santa Barbara. 19 | The Annenberg/Pew Archive of Presidential Campaign Discourse includes transcripts of campaign speeches delivered by the Democratic and Republican presidential nominees between September 1st and the election day, as well as their nomination acceptance speeches. 20 | Overall, it covers 12 elections and 21 presidential campaigns from 1952 to 1996 with 2,406 speeches... 21 | We use the American Presidency Project to expand on the Annenberg/Pew Archive data by adding five most recent elections from 2000 to 2016 and incorporating all speeches delivered during presidential campaigns. 22 | The average speech length is 2,167 words, and 90% of the speeches are between 500 words to 5000 words. 23 | 24 | Regarding their analysis, the authors also mention that 25 | > While our speech data and populism measurement include speeches and populism score by all candidates and span from the day of candidacy announcement to the election day, we only include the speeches delivered by the final candidates from the two parties from January to the election day to test our theoretical model, which results in 3,436 speeches. 26 | 27 | The corpus is in English. 28 | 29 | ## Annotation procedure 30 | 31 | Speeches are divided into sub-speeches of 10 paragraphs. 32 | These sub-speeches are treated as a document in the classification task. 33 | 34 | The annotation procedure is described in their article section "Measurement of Populist Rhetoric" (pp. 388-393). 35 | 36 | The authors randomly sampled 73 out of 4,314 speeches which contain 407 sub-speeches. 37 | This was stratified to have at least two speeches from every decade to account for possible variations of populist language over time. 38 | The sample was then coded by the first human coder. 39 | The second coder coded a smaller set of 69 sub-speeches from the sample to evaluate inter-coder reliability. 40 | In case of discrepancies, this was resolved using the majority rule with a third coder. 41 | After training the classifier and applying it to the entire corpus, the authors used active learning to identify the most informative documents (in this case, documents that the classifier was most uncertain about the labeling). 42 | These queried documents were then coded by the human coder and added to re-train the classifier again to query new documents to code. 43 | This additional coding process was repeated 9 times until they labelled an additional 180 sub-speeches. 44 | 45 | Each sub-speech was coded by each coder either as populist or not following the below definition explained in the article pp. 389. 46 | 47 | - *Pop:* accept if considered populist, reject if not considered populist. 48 | 49 | > A text is considered populist if and only if it (1) recognizes the people instead of the elite as the only legitimate source of power (people-centric); 50 | (2) creates separation between us and them (anti-pluralist); 51 | and, in doing so, (3) stipulates the separation of us and them on moral grounds (good versus evil; Dai, 2019; Hawkins, 2009) 52 | 53 | ## The data 54 | 55 | source: obtained via emailing the authors. 56 | 57 | ### Descriptives 58 | 59 | Number of documents: 587 60 | 61 | Number of annotations per document: 1 to 3 62 | 63 | $$more to be added$$ 64 | 65 | ### Problems 66 | 67 | *none known* 68 | 69 | ### Cleaned data 70 | 71 | Corresponding to the analysis, we provide the following CSV file: 72 | 73 | - populist rhetoric classification: 74 | "dai_when_2022-campaignspeech_populism.csv" 75 | - column 'label' indicates the annotation: "accept" when considered populist, "reject" when not considered populist 76 | - column 'text' records the coded sub-speeches 77 | - column 'metadata__candidate' indicates the candidate name of the sub-speech 78 | - column 'metadata__title' contains a brief description of the sub-speech (the type of the campaign speech e.g., remark and interview and where it was given) 79 | - column 'metadata__year' indicates the year the sub-speech was given 80 | - column 'metadata__test' indicates [[needs to be clarified]] 81 | - column 'metadata__row_number' indicates the original row number of each sub-speech before cleaning the data 82 | 83 | Number of unique documents/text: 84 | 85 | Label distribution: ... 86 | 87 | ## Download data files 88 | 89 | | dataset_key | file | url | 90 | |:--------------|:------------------------------------------|:------------------------------------------------------------------------------------------------------------------------| 91 | | dai_when_2022 | dai_when_2022-campaignspeech_populism.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/dai_when_2022/dai_when_2022-campaignspeech_populism.csv | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/.gitignore: -------------------------------------------------------------------------------- 1 | !annotations/ 2 | annotation_set_*.csv 3 | annotation_set_*.jsonl 4 | raw.xlsx 5 | 6 | !annotation_set_01.csv 7 | !annotation_set_02.csv 8 | !annotation_set_03.csv 9 | 10 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/README.md: -------------------------------------------------------------------------------- 1 | # Sentence-level policy pledge annotations of English and Swedish manifesto sentences 2 | 3 | Paper: Fornaciari et al. ([2021](https://doi.org/10.18653/v1/2021.findings-acl.301)) 4 | Data: [Github](https://github.com/fornaciari/MiMac_taxes/raw/refs/heads/main/jupyter_xsl_preproc_210130170501/all210126.xlsx) 5 | 6 | ## Definition of pledge 7 | 8 | Fornaciari et al. point to Thomson et al.'s' ([2017](https://doi.org/10.1111/ajps.12313), 532; emphases added) definition: 9 | 10 | > For a statement to qualify as a pledge, it must contain **language indicating commitment** to some future action or outcome. 11 | > Pledges include both firm commitment language, such as “we will” or “we promise to,” as well as more softly described intention, such as “we support” or “we favor,” as long as parties indicate that they support the action or outcome referred to unequivocally. 12 | > What determines whether a statement qualifies as a pledge is the **testability of the action or outcome** to which the party is committing itself. 13 | > **A pledge is a statement committing a party to an action or outcome that is testable**: 14 | > That is, we can gather evidence and make an argument that the action or outcome was either accomplished or not. 15 | > Many statements that begin with hard commitment language would be considered rhetoric, not pledges, because they do not meet the testability criteria — for example, “we will ensure that our government shows respect for families” or “we support fair treatment for all.” 16 | > We define a pledge as **a statement committing a party to one specific action or outcome that can be clearly determined to have occurred or not** [*post hoc*]. 17 | 18 | However, Thomson et al. discuss the pros and cons of this narrow definition (see the [Supporting Information](https://onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1111%2Fajps.12313&file=ajps12313-sup-0001-SuppMat.pdf) for their paper). 19 | 20 | 21 | ## Download data files 22 | 23 | | dataset_key | file | url | 24 | |:-------------------|:--------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------| 25 | | fornaciari_we_2021 | fornaciari_we_2021-pledge_binary.tsv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/fornaciari_we_2021-pledge_binary.tsv | 26 | | fornaciari_we_2021 | fornaciari_we_2021-pledge_binary_sample.tsv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/fornaciari_we_2021-pledge_binary_sample.tsv | 27 | | fornaciari_we_2021 | annotation_set_05.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/annotation_set_05.csv | 28 | | fornaciari_we_2021 | annotation_set_04.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/annotation_set_04.csv | 29 | | fornaciari_we_2021 | annotation_set_03.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/annotation_set_03.csv | 30 | | fornaciari_we_2021 | annotation_set_02.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/annotation_set_02.csv | 31 | | fornaciari_we_2021 | annotation_set_01.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/fornaciari_we_2021/annotation_set_01.csv | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotation_set_01.csv: -------------------------------------------------------------------------------- 1 | text_id,text,label,metadata__party,metadata__year,split_ 2 | 1030,"Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",0,INC,2019,0 3 | 406,Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,1,INC,2019,0 4 | 574,We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,1,BJP,2019,0 5 | 502,"A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",1,BJP,2009,0 6 | 228,Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,0,BJP,2009,0 7 | 773,The number of courts and the number of judges will be doubled in five years for quicker judicial process .,1,BJP,2004,0 8 | 323,The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,1,BJP,2004,0 9 | 185,We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,1,BJP,1999,0 10 | 197,"India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",1,BJP,2009,0 11 | 212,Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,0,INC,1999,0 12 | 681,This will promote competition and enhance efficiency in procurement processes .,0,INC,2014,0 13 | 799,The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,0,BJP,2004,0 14 | 218,"The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",1,BJP,2004,0 15 | 148,"It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",0,BJP,2004,0 16 | 331,The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,0,INC,2009,0 17 | 92,Involve the state Governments in the promotion of foreign trade and commerce .,0,BJP,2014,0 18 | 207,"Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",1,INC,2019,0 19 | 335,"We will introduce the goods and services tax from April 1, 2010 .",1,INC,2009,0 20 | 336,"Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",1,BJP,2004,0 21 | 490,"Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",0,BJP,2014,0 22 | 288,The Indian National Congress has endeavoured to provide quality public health services to all citizens .,0,INC,2014,0 23 | 62,"They are, in fact, responsible for the electoral growth of the BJP .",0,INC,2009,0 24 | 370,A raw material use policy will be unveiled in the mines sector .,0,BJP,2004,0 25 | 181,More specialist battalions will be raised and positioned in key locations across the country .,1,INC,2009,0 26 | 718,The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,1,INC,1999,0 27 | 279,"The India of tomorrow will have 125 crore such dreams, and will be built on the same .",0,BJP,2014,0 28 | 469,These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,1,BJP,2019,0 29 | 398,A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,1,INC,2004,0 30 | 189,"Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",1,INC,2004,0 31 | 515,"Art, culture and heritage constitute the identity of a people .",0,INC,2019,0 32 | 587,"Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",1,BJP,2014,0 33 | 9,"Faced by this aggression in Kargil, the response of the Government was swift, though measured .",0,BJP,1999,0 34 | 227,Targeting time spent for tax compliance at 1 hour per month .,1,BJP,2019,0 35 | 470,We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,0,BJP,2019,0 36 | 140,We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,0,BJP,2019,0 37 | 17,This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,0,INC,2004,0 38 | 287,Antyodaya cards for all households at risk of hunger will be introduced .,1,INC,2004,0 39 | 687,"In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",1,BJP,2009,0 40 | 682,Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,0,BJP,2009,0 41 | 604,Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,1,BJP,2004,0 42 | 58,"4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",0,INC,2019,0 43 | 168,11 . We will strengthen the legal and institutional framework to protect our children .,0,INC,2014,0 44 | 938,Joint projects in the energy sector will be actively explored .,0,INC,1999,0 45 | 160,a . We will ensure the passage of the Women’s Reservation Bill .,1,INC,2014,0 46 | 42,"The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",0,BJP,2004,0 47 | 343,"While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",0,INC,2009,0 48 | 423,"New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",1,INC,1999,0 49 | 788,"In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",0,INC,2019,0 50 | 157,This shall be responsible for setting and enforcing standards for all food products .,1,BJP,2004,0 51 | 144,We will also introduce a multi - purpose identity card for all citizens .,1,BJP,1999,0 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotation_set_02.csv: -------------------------------------------------------------------------------- 1 | text_id,text,label,metadata__party,metadata__year,split_ 2 | 164,"Mahila Sashaktikaran, to provide for the political empowerment and full educational, economic and legal equality for women ;",0,INC,2004,1 3 | 761,We will : set up Gas Grids to make gas available to households and industry .,1,BJP,2014,1 4 | 156,For reaching this goal we will take effective steps to create a riot - free order and a terrorism - free India .,1,BJP,1999,1 5 | 276,We will take the Mission to a new level through sustainable Solid Waste Management in every village .,1,BJP,2019,1 6 | 241,All controls on the free movement of farm commodities and processing of agricultural products and all regulations that depress incomes of farmers will be systematically eliminated .,0,INC,2009,1 7 | 627,The Court of Appeal will sit in multiple Benches of 3 judges each in 6 locations .,1,INC,2019,1 8 | 147,4 . We will provide skills training to 10 crore youth and provide them with employment opportunities over the next 5 years .,1,INC,2014,1 9 | 566,The Indian National Congress is committed to ensuring these projects are completed in a time - bound manner .,1,INC,2014,1 10 | 682,All efforts will be made for the eradication of filariasis within five years .,1,BJP,2004,1 11 | 780,"On the other side is an ideology of hate, bigotry and prejudice, an ideology that negates the very idea of India that has been cherished by us for centuries .",0,INC,2014,1 12 | 228,A wider and more comprehensive spectrum of skills will be imparted .,0,INC,2009,1 13 | 50,"For this, the government shall introduce time - bound programmes of needed administrative reforms including those for the Police and other Civil Services .",0,BJP,1999,1 14 | 184,We will also enact a comprehensive Broadcasting Bill to regulate private broadcasting and to protect Indian interests .,1,BJP,1999,1 15 | 315,"An initial set of identified schemes will be implemented, with public participation, by 2015 .",0,BJP,2004,1 16 | 360,The Indian National Congress has declared the sacred Ganga as a “national river” .,0,INC,2009,1 17 | 1,A time to reestablish the core values that have made India unique .,0,INC,2004,1 18 | 255,National scholarships for boys and girls belonging to scheduled castes and scheduled tribes will be further increased .,1,INC,2009,1 19 | 442,"More funds will be allocated to sports, and we will encourage the State Governments to fully discharge their responsibility in the promotion of sports .",1,BJP,2014,1 20 | 220,"Like it did for panchayats, the Congress will bring forward a Constitutional amendment to ensure the democratic, autonomous and professional functioning of all cooperatives .",1,INC,2004,1 21 | 333,"After agriculture, the retail sector is the largest employer of nearly four crore people .",0,BJP,2009,1 22 | 425,"Key leverage technologies such as information technology, biotechnology and materials science and technology would be given special importance .",0,BJP,2009,1 23 | 996,"We will implement a solid waste management plan in every habitation, village, town and city employing modern technology and machinery .",0,INC,2019,1 24 | 57,It means that we will facilitate the domestic industry to gain enough muscles to compete with the multinationals in the local and global markets .,0,BJP,1999,1 25 | 348,"A new national movement for sanitation and hygiene, along the lines launched by Gandhijl during the Freedom movement will now be started and spearheaded by the Congress .",1,INC,1999,1 26 | 219,It also pledges a major programme for training of teachers and improving the physical environment in schools .,1,INC,2009,1 27 | 203,We appeal for the cooperation of all parties and all sections of society in this great endeavour .,0,BJP,1999,1 28 | 290,"New international airport projects for Delhi, Mumbai, Bangalore and Hyderabad will be completed within the next three to four years and the Prime Minister will inaugurate them before August 15, 2008 .",1,BJP,2004,1 29 | 146,"We will enact the Lok Pal Bill with adequate powers to deal with corruption charges against anyone, including the Prime Minister .",1,BJP,1999,1 30 | 60,Our target is to achieve at least $10 billion per year which will commensurate with our growth objectives .,1,BJP,1999,1 31 | 112,The rate of both public and private investment has declined over the period that the BJP - led NDA government has been in office .,0,INC,2004,1 32 | 145,We will undertake all necessary legislative and administrative measures to ensure the right of franchise of the Armed Forces through proxy voting and or any other method .,1,BJP,1999,1 33 | 758,"However, they have been facing many problems with rapid urbanization and changes in the traditional joint families .",0,BJP,2004,1 34 | 697,We will amend the Service Rules to reserve for women 33 per cent of appointments to posts in the Central Government .,1,INC,2019,1 35 | 433,"Space research, peaceful uses of nuclear energy, and civilian applications of defense research will be further promoted, building on India’s self - reliant strides in these areas .",0,BJP,2004,1 36 | 262,Congress reiterates its firm belief in a high rate of growth of GDP .,0,INC,2019,1 37 | 270,This must be reinforced by other programmes for the development and diversification of the rural economy .,0,INC,1999,1 38 | 583,"We will ensure a 'Swachh Bharat' by Gandhiji's 150th birth anniversary in 2019, taking it up in mission mode by converging resources and building around jan bhagidari :",1,BJP,2014,1 39 | 278,The Indian National Congress also proposes to reserve one - third of all central government jobs for women .,1,INC,2009,1 40 | 346,One of the main reasons for this is the absence of proper urban and rural sanitation and poor liquid and solid waste management .,0,INC,1999,1 41 | 275,"We have constructed over 9 crore toilets under our flagship programme, Swachh Bharat Mission .",1,BJP,2019,1 42 | 175,Fiscal incentives to promote employment - intensive growth will be introduced .,0,INC,2004,1 43 | 166,"We are committed to establishing a civilised, humane and just civil order ; that which does not discriminate on ground — of caste, religion, class, colour, race or sex .",0,BJP,1999,1 44 | 46,It is the Congress that began the process of extending social security to the 93 per cent work force that subsists In the unorganized sector .,0,INC,2004,1 45 | 252,Exempt income up to Rs 3 lakh from Income Tax .,1,BJP,2009,1 46 | 308,"We will further encourage integrated development of coastal areas including coastal cities, coastal transport and coastal industrialization .",0,BJP,2019,1 47 | 12,"The Indian National Congress is the only party that combines experience and youth, wisdom and exuberance, achievement and ambition .",0,INC,2009,1 48 | 119,"Congress promises an adequately capitalised Tourism Development Bank to provide low - cost, long - term funds for investment in tourism - related businesses .",1,INC,2019,1 49 | 417,The Indian National Congress also acknowledges the vital role remittances by overseas Indians play in bolstering the country’s finances .,0,INC,2009,1 50 | 59,"At the same time the country cannot do without FDI because besides capital stock it brings with it technology, new market practices and most importantly employment .",1,BJP,1999,1 51 | 244,"New godowns, storage facilities, cold storage nehvorks and access roads will get priority .",1,INC,1999,1 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotation_set_03.csv: -------------------------------------------------------------------------------- 1 | text_id,text,label,metadata__party,metadata__year,split_ 2 | 773,10 . Protecting Indians overseas from exploitation or threats will remain a paramount concern of the Indian National Congress .,0,INC,2014,2 3 | 534,An ‘Extremely Backward Communities Development Bank’ will be set up for promoting skill enhancement through learn - and - earn schemes for their uplift .,1,BJP,2009,2 4 | 313,"Similarly, almost all remaining households have been provided with an electricity connection .",1,BJP,2019,2 5 | 444,We will expand this initiative further to take the number of beneficiaries of Mudra loans up to 30 crore .,1,BJP,2019,2 6 | 19,"In this sacred endeavour, the Congress has joined hands with like - minded political parties in different states .",0,INC,2004,2 7 | 249,"The Indian National Congress has always stood for its democratic, autonomous and professional functioning and this will be ensured by enacting appropriate laws, including making a Constitutional provision .",1,INC,2009,2 8 | 101,We are committed to annulling Article 35A of the Constitution of India as the provision is discriminatory against non - permanent residents and women of Jammu and Kashmir .,1,BJP,2019,2 9 | 518,"A suitable law, enabling micro - credit operations and nurturing them to be scaled up, will be enacted .",1,BJP,2004,2 10 | 253,"Education at all stages — primary, secondary and university — will be free in all respects for boys and girls belonging to dalit and adivasi communities .",1,INC,2009,2 11 | 630,The Indian National Congress is committed to providing clean cooking fuel across the country in an accelerated manner in order to address this .,0,INC,2014,2 12 | 147,The party believes in the principle of unity in diversity .,0,BJP,2014,2 13 | 642,We will increase the strength of the diplomatic and allied cadres to keep pace with our increasing global engagement and enhanced stature of India in the world .,0,BJP,2019,2 14 | 616,"provide tax incentives for investments in research and development, geared towards indigenization of technology and innovation .",1,BJP,2014,2 15 | 117,Most of the plans had been finalized before the Congress demitted office .,0,INC,2004,2 16 | 963,The manifesto is an opportunity for a political party to present its agenda for the future embodying the loftiest hopes and noblest aspirations of the people .,0,INC,1999,2 17 | 117,It has launched the National Rural Health Mission which has already made a positive impact by improving the quality and accessibility of primary health care in villages .,0,INC,2009,2 18 | 343,"There shall be a special survey, which will be undertaken once every five years to estimate, on select development indicators, the gap in the development of these groups .",1,INC,2014,2 19 | 450,Launch a National Digital Highway Development Project to bring affordable broadband Internet connectivity to every village .,1,BJP,2009,2 20 | 256,Dispense with clubbing of agricultural income with other sources of income for determining tax liability on other income .,1,BJP,2009,2 21 | 863,"PHCs will provide all primary health services, including preventive measures and wellness services, and become referral centres for serious medical cases .",0,INC,2019,2 22 | 150,The recently established National Security Council will advise the government in this regard and also in the establishment of a credible nuclear deterrence .,0,BJP,1999,2 23 | 948,Congress will work with State Governments to ensure that a child receives a good quality education and that this is reflected in learning outcomes .,0,INC,2019,2 24 | 69,"It is, as has become painfully evident, not confined to any community or any political persuasion .",0,INC,2009,2 25 | 481,"9 . We will set up a “National Panchayati Raj Commission”, with branches in each State and Union Territory, so that :",1,INC,2014,2 26 | 354,The BJP will set up an experts committee to deal with the following issues :,1,BJP,2009,2 27 | 18,We aspire to be the world's third largest economy by 2030 .,0,BJP,2019,2 28 | 197,Deployment of broadband in every village would be a thrust area .,1,BJP,2014,2 29 | 210,The Credit Guarantee Scheme of the Government of India is an important component wherein loans to MSME are guaranteed .,1,BJP,2019,2 30 | 572,We will offer higher interest rates on fixed deposits by divyangs .,1,BJP,2019,2 31 | 700,"Working of regulatory bodies, which oversee medical education in the country, will be reviewed to improve standards .",0,BJP,2004,2 32 | 503,MGNREGA will also be harnessed to support the construction of poultry shelters and water bodies for fisheries .,1,INC,2014,2 33 | 36,We reach out to the minorities and even at the cost of repetition proclaim that we will safeguard the rights as enshrined in our Constitution .,0,BJP,1999,2 34 | 241,3 . Announce a detailed Jobs Agenda to ensure that we create 10 crore new jobs and entrepreneurship opportunities for our youth .,1,INC,2014,2 35 | 107,"India will engage with the world in the global war on terror while not compromising on its domestic interests, primarily protecting citizens from the ravages of terrorism .",0,BJP,2009,2 36 | 738,Saving Rama Setu is to save the vast thorium deposits which are the future source of our energy .,0,BJP,2009,2 37 | 523,We propose to take strong measures to promote the manufacturing industry .,0,INC,2014,2 38 | 216,The Congress will ensure the fullest implementation of minimum wage laws for farm labour .,1,INC,2004,2 39 | 288,The Indian National Congress will bring school curricula of various communal and sectarian organizations — regardless of their affiliation —under the regulatory purview of an empowered national body .,1,INC,2009,2 40 | 140,The present system of setting Minimum Support Prices by the Commission on Agricultural Costs and Prices will be reviewed to further benefit all kisans .,0,BJP,2004,2 41 | 404,"A ‘Vishwakarma Initiative’ will be launched for craftspersons and rural artisans to preserve their traditional skills and knowledge, upgrade them, and adapt to new challenges .",1,BJP,2004,2 42 | 44,"We want an India which we all feel part of, in whose future we all have a stake .",0,BJP,1999,2 43 | 775,"But at the same time, governance in these states has to improve vastly .",0,INC,1999,2 44 | 890,"The NDA Government envisions a future that rests on a cooperative multipolar world order, with India as one of the poles .",0,BJP,2004,2 45 | 514,"Continuing our work, we will legislate a bill to prohibit and eliminate practices such as Triple Talaq and Nikah Halala .",1,BJP,2019,2 46 | 146,"Flawed design, inefficient execution, insufficient capacity and poor maintenance of infrastructure have dragged India’s growth rate down .",0,INC,2019,2 47 | 140,The sustained campaign led by the Congress President has resulted in the declaration of Gandhi Jayanti as International Day of Non - Violence by the United Nations .,0,INC,2009,2 48 | 200,"Along the lines of NREGA, we will enact a National Food Security Act .",1,INC,2009,2 49 | 675,21 Ensure the LPG gas cylinder connection to all poor rural households .,0,BJP,2019,2 50 | 164,a . We will enact central legislation on the Scheduled Castes and Scheduled Tribes Sub Plans to ensure focused spending of funds on weaker sections .,1,INC,2014,2 51 | 88,"Launch a massive programme to detect, detain and deport illegal immigrants .",1,BJP,2009,2 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group1/buttler.dominik.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133835,"Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",0,1030,2019,INC,Not Pledge 3 | 133836,Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,0,406,2019,INC,Pledge 4 | 133837,We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,0,574,2019,BJP,Pledge 5 | 133838,"A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",0,502,2009,BJP,Pledge 6 | 133839,Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,0,228,2009,BJP,Not Pledge 7 | 133840,The number of courts and the number of judges will be doubled in five years for quicker judicial process .,0,773,2004,BJP,Pledge 8 | 133841,The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,0,323,2004,BJP,Not Pledge 9 | 133842,We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,0,185,1999,BJP,Pledge 10 | 133843,"India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",0,197,2009,BJP,Not Pledge 11 | 133844,Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,0,212,1999,INC,Not Pledge 12 | 133845,This will promote competition and enhance efficiency in procurement processes .,0,681,2014,INC,Not Pledge 13 | 133846,The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,0,799,2004,BJP,Pledge 14 | 133847,"The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",0,218,2004,BJP,Not Pledge 15 | 133848,"It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",0,148,2004,BJP,Not Pledge 16 | 133849,The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,0,331,2009,INC,Not Pledge 17 | 133850,Involve the state Governments in the promotion of foreign trade and commerce .,0,92,2014,BJP,Not Pledge 18 | 133851,"Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",0,207,2019,INC,Not Pledge 19 | 133852,"We will introduce the goods and services tax from April 1, 2010 .",0,335,2009,INC,Pledge 20 | 133853,"Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",0,336,2004,BJP,Not Pledge 21 | 133854,"Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",0,490,2014,BJP,Not Pledge 22 | 133855,The Indian National Congress has endeavoured to provide quality public health services to all citizens .,0,288,2014,INC,Not Pledge 23 | 133856,"They are, in fact, responsible for the electoral growth of the BJP .",0,62,2009,INC,Not Pledge 24 | 133857,A raw material use policy will be unveiled in the mines sector .,0,370,2004,BJP,Not Pledge 25 | 133858,More specialist battalions will be raised and positioned in key locations across the country .,0,181,2009,INC,Not Pledge 26 | 133859,The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,0,718,1999,INC,Pledge 27 | 133860,"The India of tomorrow will have 125 crore such dreams, and will be built on the same .",0,279,2014,BJP,Not Pledge 28 | 133861,These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,0,469,2019,BJP,Not Pledge 29 | 133862,A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,0,398,2004,INC,Pledge 30 | 133863,"Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",0,189,2004,INC,Not Pledge 31 | 133864,"Art, culture and heritage constitute the identity of a people .",0,515,2019,INC,Not Pledge 32 | 133865,"Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",0,587,2014,BJP,Pledge 33 | 133866,"Faced by this aggression in Kargil, the response of the Government was swift, though measured .",0,9,1999,BJP,Not Pledge 34 | 133867,Targeting time spent for tax compliance at 1 hour per month .,0,227,2019,BJP,Not Pledge 35 | 133868,We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,0,470,2019,BJP,Not Pledge 36 | 133869,We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,0,140,2019,BJP,Not Pledge 37 | 133870,This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,0,17,2004,INC,Not Pledge 38 | 133871,Antyodaya cards for all households at risk of hunger will be introduced .,0,287,2004,INC,Pledge 39 | 133872,"In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",0,687,2009,BJP,Not Pledge 40 | 133873,Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,0,682,2009,BJP,Not Pledge 41 | 133874,Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,0,604,2004,BJP,Pledge 42 | 133875,"4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",0,58,2019,INC,Not Pledge 43 | 133876,11 . We will strengthen the legal and institutional framework to protect our children .,0,168,2014,INC,Not Pledge 44 | 133877,Joint projects in the energy sector will be actively explored .,0,938,1999,INC,Not Pledge 45 | 133878,a . We will ensure the passage of the Women’s Reservation Bill .,0,160,2014,INC,Pledge 46 | 133879,"The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",0,42,2004,BJP,Not Pledge 47 | 133880,"While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",0,343,2009,INC,Not Pledge 48 | 133881,"New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",0,423,1999,INC,Pledge 49 | 133882,"In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",0,788,2019,INC,Not Pledge 50 | 133883,This shall be responsible for setting and enforcing standards for all food products .,0,157,2004,BJP,Not Pledge 51 | 133884,We will also introduce a multi - purpose identity card for all citizens .,0,144,1999,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group1/jonas.bruder.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133835,"Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",0,1030,2019,INC,Not Pledge 3 | 133836,Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,0,406,2019,INC,Pledge 4 | 133837,We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,0,574,2019,BJP,Pledge 5 | 133838,"A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",0,502,2009,BJP,Pledge 6 | 133839,Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,0,228,2009,BJP,Not Pledge 7 | 133840,The number of courts and the number of judges will be doubled in five years for quicker judicial process .,0,773,2004,BJP,Pledge 8 | 133841,The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,0,323,2004,BJP,Pledge 9 | 133842,We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,0,185,1999,BJP,Pledge 10 | 133843,"India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",0,197,2009,BJP,Not Pledge 11 | 133844,Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,0,212,1999,INC,Not Pledge 12 | 133845,This will promote competition and enhance efficiency in procurement processes .,0,681,2014,INC,Not Pledge 13 | 133846,The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,0,799,2004,BJP,Pledge 14 | 133847,"The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",0,218,2004,BJP,Not Pledge 15 | 133848,"It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",0,148,2004,BJP,Not Pledge 16 | 133849,The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,0,331,2009,INC,Not Pledge 17 | 133850,Involve the state Governments in the promotion of foreign trade and commerce .,0,92,2014,BJP,Pledge 18 | 133851,"Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",0,207,2019,INC,Not Pledge 19 | 133852,"We will introduce the goods and services tax from April 1, 2010 .",0,335,2009,INC,Pledge 20 | 133853,"Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",0,336,2004,BJP,Not Pledge 21 | 133854,"Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",0,490,2014,BJP,Not Pledge 22 | 133855,The Indian National Congress has endeavoured to provide quality public health services to all citizens .,0,288,2014,INC,Not Pledge 23 | 133856,"They are, in fact, responsible for the electoral growth of the BJP .",0,62,2009,INC,Not Pledge 24 | 133857,A raw material use policy will be unveiled in the mines sector .,0,370,2004,BJP,Pledge 25 | 133858,More specialist battalions will be raised and positioned in key locations across the country .,0,181,2009,INC,Pledge 26 | 133859,The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,0,718,1999,INC,Pledge 27 | 133860,"The India of tomorrow will have 125 crore such dreams, and will be built on the same .",0,279,2014,BJP,Not Pledge 28 | 133861,These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,0,469,2019,BJP,Not Pledge 29 | 133862,A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,0,398,2004,INC,Pledge 30 | 133863,"Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",0,189,2004,INC,Not Pledge 31 | 133864,"Art, culture and heritage constitute the identity of a people .",0,515,2019,INC,Not Pledge 32 | 133865,"Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",0,587,2014,BJP,Pledge 33 | 133866,"Faced by this aggression in Kargil, the response of the Government was swift, though measured .",0,9,1999,BJP,Not Pledge 34 | 133867,Targeting time spent for tax compliance at 1 hour per month .,0,227,2019,BJP,Pledge 35 | 133868,We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,0,470,2019,BJP,Not Pledge 36 | 133869,We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,0,140,2019,BJP,Not Pledge 37 | 133870,This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,0,17,2004,INC,Not Pledge 38 | 133871,Antyodaya cards for all households at risk of hunger will be introduced .,0,287,2004,INC,Pledge 39 | 133872,"In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",0,687,2009,BJP,Not Pledge 40 | 133873,Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,0,682,2009,BJP,Not Pledge 41 | 133874,Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,0,604,2004,BJP,Pledge 42 | 133875,"4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",0,58,2019,INC,Not Pledge 43 | 133876,11 . We will strengthen the legal and institutional framework to protect our children .,0,168,2014,INC,Pledge 44 | 133877,Joint projects in the energy sector will be actively explored .,0,938,1999,INC,Pledge 45 | 133878,a . We will ensure the passage of the Women’s Reservation Bill .,0,160,2014,INC,Pledge 46 | 133879,"The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",0,42,2004,BJP,Not Pledge 47 | 133880,"While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",0,343,2009,INC,Not Pledge 48 | 133881,"New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",0,423,1999,INC,Pledge 49 | 133882,"In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",0,788,2019,INC,Not Pledge 50 | 133883,This shall be responsible for setting and enforcing standards for all food products .,0,157,2004,BJP,Not Pledge 51 | 133884,We will also introduce a multi - purpose identity card for all citizens .,0,144,1999,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group1/samuel.droessler.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133835,"Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",0,1030,2019,INC,Pledge 3 | 133836,Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,0,406,2019,INC,Pledge 4 | 133837,We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,0,574,2019,BJP,Pledge 5 | 133838,"A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",0,502,2009,BJP,Pledge 6 | 133839,Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,0,228,2009,BJP,Pledge 7 | 133840,The number of courts and the number of judges will be doubled in five years for quicker judicial process .,0,773,2004,BJP,Pledge 8 | 133841,The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,0,323,2004,BJP,Not Pledge 9 | 133842,We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,0,185,1999,BJP,Pledge 10 | 133843,"India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",0,197,2009,BJP,Pledge 11 | 133844,Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,0,212,1999,INC,Pledge 12 | 133845,This will promote competition and enhance efficiency in procurement processes .,0,681,2014,INC,Pledge 13 | 133846,The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,0,799,2004,BJP,Pledge 14 | 133847,"The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",0,218,2004,BJP,Not Pledge 15 | 133848,"It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",0,148,2004,BJP,Pledge 16 | 133849,The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,0,331,2009,INC,Not Pledge 17 | 133850,Involve the state Governments in the promotion of foreign trade and commerce .,0,92,2014,BJP,Pledge 18 | 133851,"Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",0,207,2019,INC,Not Pledge 19 | 133852,"We will introduce the goods and services tax from April 1, 2010 .",0,335,2009,INC,Pledge 20 | 133853,"Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",0,336,2004,BJP,Pledge 21 | 133854,"Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",0,490,2014,BJP,Not Pledge 22 | 133855,The Indian National Congress has endeavoured to provide quality public health services to all citizens .,0,288,2014,INC,Not Pledge 23 | 133856,"They are, in fact, responsible for the electoral growth of the BJP .",0,62,2009,INC,Not Pledge 24 | 133857,A raw material use policy will be unveiled in the mines sector .,0,370,2004,BJP,Pledge 25 | 133858,More specialist battalions will be raised and positioned in key locations across the country .,0,181,2009,INC,Pledge 26 | 133859,The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,0,718,1999,INC,Pledge 27 | 133860,"The India of tomorrow will have 125 crore such dreams, and will be built on the same .",0,279,2014,BJP,Not Pledge 28 | 133861,These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,0,469,2019,BJP,Pledge 29 | 133862,A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,0,398,2004,INC, 30 | 133863,"Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",0,189,2004,INC,Pledge 31 | 133864,"Art, culture and heritage constitute the identity of a people .",0,515,2019,INC,Not Pledge 32 | 133865,"Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",0,587,2014,BJP,Pledge 33 | 133866,"Faced by this aggression in Kargil, the response of the Government was swift, though measured .",0,9,1999,BJP,Not Pledge 34 | 133867,Targeting time spent for tax compliance at 1 hour per month .,0,227,2019,BJP,Pledge 35 | 133868,We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,0,470,2019,BJP,Pledge 36 | 133869,We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,0,140,2019,BJP,Pledge 37 | 133870,This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,0,17,2004,INC,Not Pledge 38 | 133871,Antyodaya cards for all households at risk of hunger will be introduced .,0,287,2004,INC,Pledge 39 | 133872,"In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",0,687,2009,BJP,Pledge 40 | 133873,Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,0,682,2009,BJP,Not Pledge 41 | 133874,Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,0,604,2004,BJP,Pledge 42 | 133875,"4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",0,58,2019,INC,Not Pledge 43 | 133876,11 . We will strengthen the legal and institutional framework to protect our children .,0,168,2014,INC,Pledge 44 | 133877,Joint projects in the energy sector will be actively explored .,0,938,1999,INC,Pledge 45 | 133878,a . We will ensure the passage of the Women’s Reservation Bill .,0,160,2014,INC,Pledge 46 | 133879,"The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",0,42,2004,BJP,Not Pledge 47 | 133880,"While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",0,343,2009,INC,Not Pledge 48 | 133881,"New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",0,423,1999,INC,Pledge 49 | 133882,"In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",0,788,2019,INC,Not Pledge 50 | 133883,This shall be responsible for setting and enforcing standards for all food products .,0,157,2004,BJP,Not Pledge 51 | 133884,We will also introduce a multi - purpose identity card for all citizens .,0,144,1999,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group2/antonia.granser.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133885,"Mahila Sashaktikaran, to provide for the political empowerment and full educational, economic and legal equality for women ;",1,164,2004,INC,Pledge 3 | 133886,We will : set up Gas Grids to make gas available to households and industry .,1,761,2014,BJP,Pledge 4 | 133887,For reaching this goal we will take effective steps to create a riot - free order and a terrorism - free India .,1,156,1999,BJP,No Pledge 5 | 133888,We will take the Mission to a new level through sustainable Solid Waste Management in every village .,1,276,2019,BJP,Pledge 6 | 133889,All controls on the free movement of farm commodities and processing of agricultural products and all regulations that depress incomes of farmers will be systematically eliminated .,1,241,2009,INC,Pledge 7 | 133890,The Court of Appeal will sit in multiple Benches of 3 judges each in 6 locations .,1,627,2019,INC,No Pledge 8 | 133891,4 . We will provide skills training to 10 crore youth and provide them with employment opportunities over the next 5 years .,1,147,2014,INC,No Pledge 9 | 133892,The Indian National Congress is committed to ensuring these projects are completed in a time - bound manner .,1,566,2014,INC,No Pledge 10 | 133893,All efforts will be made for the eradication of filariasis within five years .,1,682,2004,BJP,Pledge 11 | 133894,"On the other side is an ideology of hate, bigotry and prejudice, an ideology that negates the very idea of India that has been cherished by us for centuries .",1,780,2014,INC,No Pledge 12 | 133895,A wider and more comprehensive spectrum of skills will be imparted .,1,228,2009,INC,No Pledge 13 | 133896,"For this, the government shall introduce time - bound programmes of needed administrative reforms including those for the Police and other Civil Services .",1,50,1999,BJP,Pledge 14 | 133897,We will also enact a comprehensive Broadcasting Bill to regulate private broadcasting and to protect Indian interests .,1,184,1999,BJP,Pledge 15 | 133898,"An initial set of identified schemes will be implemented, with public participation, by 2015 .",1,315,2004,BJP,No Pledge 16 | 133899,The Indian National Congress has declared the sacred Ganga as a “national river” .,1,360,2009,INC,No Pledge 17 | 133900,A time to reestablish the core values that have made India unique .,1,1,2004,INC,No Pledge 18 | 133901,National scholarships for boys and girls belonging to scheduled castes and scheduled tribes will be further increased .,1,255,2009,INC,Pledge 19 | 133902,"More funds will be allocated to sports, and we will encourage the State Governments to fully discharge their responsibility in the promotion of sports .",1,442,2014,BJP,Pledge 20 | 133903,"Like it did for panchayats, the Congress will bring forward a Constitutional amendment to ensure the democratic, autonomous and professional functioning of all cooperatives .",1,220,2004,INC,No Pledge 21 | 133904,"After agriculture, the retail sector is the largest employer of nearly four crore people .",1,333,2009,BJP,No Pledge 22 | 133905,"Key leverage technologies such as information technology, biotechnology and materials science and technology would be given special importance .",1,425,2009,BJP,No Pledge 23 | 133906,"We will implement a solid waste management plan in every habitation, village, town and city employing modern technology and machinery .",1,996,2019,INC,Pledge 24 | 133907,It means that we will facilitate the domestic industry to gain enough muscles to compete with the multinationals in the local and global markets .,1,57,1999,BJP,No Pledge 25 | 133908,"A new national movement for sanitation and hygiene, along the lines launched by Gandhijl during the Freedom movement will now be started and spearheaded by the Congress .",1,348,1999,INC,Pledge 26 | 133909,It also pledges a major programme for training of teachers and improving the physical environment in schools .,1,219,2009,INC,No Pledge 27 | 133910,We appeal for the cooperation of all parties and all sections of society in this great endeavour .,1,203,1999,BJP,No Pledge 28 | 133911,"New international airport projects for Delhi, Mumbai, Bangalore and Hyderabad will be completed within the next three to four years and the Prime Minister will inaugurate them before August 15, 2008 .",1,290,2004,BJP,Pledge 29 | 133912,"We will enact the Lok Pal Bill with adequate powers to deal with corruption charges against anyone, including the Prime Minister .",1,146,1999,BJP,Pledge 30 | 133913,Our target is to achieve at least $10 billion per year which will commensurate with our growth objectives .,1,60,1999,BJP,No Pledge 31 | 133914,The rate of both public and private investment has declined over the period that the BJP - led NDA government has been in office .,1,112,2004,INC,No Pledge 32 | 133915,We will undertake all necessary legislative and administrative measures to ensure the right of franchise of the Armed Forces through proxy voting and or any other method .,1,145,1999,BJP,Pledge 33 | 133916,"However, they have been facing many problems with rapid urbanization and changes in the traditional joint families .",1,758,2004,BJP,No Pledge 34 | 133917,We will amend the Service Rules to reserve for women 33 per cent of appointments to posts in the Central Government .,1,697,2019,INC,Pledge 35 | 133918,"Space research, peaceful uses of nuclear energy, and civilian applications of defense research will be further promoted, building on India’s self - reliant strides in these areas .",1,433,2004,BJP,Pledge 36 | 133919,Congress reiterates its firm belief in a high rate of growth of GDP .,1,262,2019,INC,No Pledge 37 | 133920,This must be reinforced by other programmes for the development and diversification of the rural economy .,1,270,1999,INC,No Pledge 38 | 133921,"We will ensure a 'Swachh Bharat' by Gandhiji's 150th birth anniversary in 2019, taking it up in mission mode by converging resources and building around jan bhagidari :",1,583,2014,BJP,No Pledge 39 | 133922,The Indian National Congress also proposes to reserve one - third of all central government jobs for women .,1,278,2009,INC,Pledge 40 | 133923,One of the main reasons for this is the absence of proper urban and rural sanitation and poor liquid and solid waste management .,1,346,1999,INC,No Pledge 41 | 133924,"We have constructed over 9 crore toilets under our flagship programme, Swachh Bharat Mission .",1,275,2019,BJP,No Pledge 42 | 133925,Fiscal incentives to promote employment - intensive growth will be introduced .,1,175,2004,INC,Pledge 43 | 133926,"We are committed to establishing a civilised, humane and just civil order ; that which does not discriminate on ground — of caste, religion, class, colour, race or sex .",1,166,1999,BJP,Pledge 44 | 133927,It is the Congress that began the process of extending social security to the 93 per cent work force that subsists In the unorganized sector .,1,46,2004,INC,No Pledge 45 | 133928,Exempt income up to Rs 3 lakh from Income Tax .,1,252,2009,BJP,Pledge 46 | 133929,"We will further encourage integrated development of coastal areas including coastal cities, coastal transport and coastal industrialization .",1,308,2019,BJP,Pledge 47 | 133930,"The Indian National Congress is the only party that combines experience and youth, wisdom and exuberance, achievement and ambition .",1,12,2009,INC,No Pledge 48 | 133931,"Congress promises an adequately capitalised Tourism Development Bank to provide low - cost, long - term funds for investment in tourism - related businesses .",1,119,2019,INC,Pledge 49 | 133932,The Indian National Congress also acknowledges the vital role remittances by overseas Indians play in bolstering the country’s finances .,1,417,2009,INC,No Pledge 50 | 133933,"At the same time the country cannot do without FDI because besides capital stock it brings with it technology, new market practices and most importantly employment .",1,59,1999,BJP,No Pledge 51 | 133934,"New godowns, storage facilities, cold storage nehvorks and access roads will get priority .",1,244,1999,INC,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group2/l.j.herbig.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133885,"Mahila Sashaktikaran, to provide for the political empowerment and full educational, economic and legal equality for women ;",1,164,2004,INC,No Pledge 3 | 133886,We will : set up Gas Grids to make gas available to households and industry .,1,761,2014,BJP,Pledge 4 | 133887,For reaching this goal we will take effective steps to create a riot - free order and a terrorism - free India .,1,156,1999,BJP,No Pledge 5 | 133888,We will take the Mission to a new level through sustainable Solid Waste Management in every village .,1,276,2019,BJP,Pledge 6 | 133889,All controls on the free movement of farm commodities and processing of agricultural products and all regulations that depress incomes of farmers will be systematically eliminated .,1,241,2009,INC,Pledge 7 | 133890,The Court of Appeal will sit in multiple Benches of 3 judges each in 6 locations .,1,627,2019,INC,Pledge 8 | 133891,4 . We will provide skills training to 10 crore youth and provide them with employment opportunities over the next 5 years .,1,147,2014,INC,Pledge 9 | 133892,The Indian National Congress is committed to ensuring these projects are completed in a time - bound manner .,1,566,2014,INC,No Pledge 10 | 133893,All efforts will be made for the eradication of filariasis within five years .,1,682,2004,BJP,No Pledge 11 | 133894,"On the other side is an ideology of hate, bigotry and prejudice, an ideology that negates the very idea of India that has been cherished by us for centuries .",1,780,2014,INC,No Pledge 12 | 133895,A wider and more comprehensive spectrum of skills will be imparted .,1,228,2009,INC,No Pledge 13 | 133896,"For this, the government shall introduce time - bound programmes of needed administrative reforms including those for the Police and other Civil Services .",1,50,1999,BJP,Pledge 14 | 133897,We will also enact a comprehensive Broadcasting Bill to regulate private broadcasting and to protect Indian interests .,1,184,1999,BJP,Pledge 15 | 133898,"An initial set of identified schemes will be implemented, with public participation, by 2015 .",1,315,2004,BJP,Pledge 16 | 133899,The Indian National Congress has declared the sacred Ganga as a “national river” .,1,360,2009,INC,No Pledge 17 | 133900,A time to reestablish the core values that have made India unique .,1,1,2004,INC,No Pledge 18 | 133901,National scholarships for boys and girls belonging to scheduled castes and scheduled tribes will be further increased .,1,255,2009,INC,Pledge 19 | 133902,"More funds will be allocated to sports, and we will encourage the State Governments to fully discharge their responsibility in the promotion of sports .",1,442,2014,BJP,Pledge 20 | 133903,"Like it did for panchayats, the Congress will bring forward a Constitutional amendment to ensure the democratic, autonomous and professional functioning of all cooperatives .",1,220,2004,INC,Pledge 21 | 133904,"After agriculture, the retail sector is the largest employer of nearly four crore people .",1,333,2009,BJP,No Pledge 22 | 133905,"Key leverage technologies such as information technology, biotechnology and materials science and technology would be given special importance .",1,425,2009,BJP,No Pledge 23 | 133906,"We will implement a solid waste management plan in every habitation, village, town and city employing modern technology and machinery .",1,996,2019,INC,Pledge 24 | 133907,It means that we will facilitate the domestic industry to gain enough muscles to compete with the multinationals in the local and global markets .,1,57,1999,BJP,No Pledge 25 | 133908,"A new national movement for sanitation and hygiene, along the lines launched by Gandhijl during the Freedom movement will now be started and spearheaded by the Congress .",1,348,1999,INC,Pledge 26 | 133909,It also pledges a major programme for training of teachers and improving the physical environment in schools .,1,219,2009,INC,Pledge 27 | 133910,We appeal for the cooperation of all parties and all sections of society in this great endeavour .,1,203,1999,BJP,No Pledge 28 | 133911,"New international airport projects for Delhi, Mumbai, Bangalore and Hyderabad will be completed within the next three to four years and the Prime Minister will inaugurate them before August 15, 2008 .",1,290,2004,BJP,Pledge 29 | 133912,"We will enact the Lok Pal Bill with adequate powers to deal with corruption charges against anyone, including the Prime Minister .",1,146,1999,BJP,Pledge 30 | 133913,Our target is to achieve at least $10 billion per year which will commensurate with our growth objectives .,1,60,1999,BJP,Pledge 31 | 133914,The rate of both public and private investment has declined over the period that the BJP - led NDA government has been in office .,1,112,2004,INC,No Pledge 32 | 133915,We will undertake all necessary legislative and administrative measures to ensure the right of franchise of the Armed Forces through proxy voting and or any other method .,1,145,1999,BJP,Pledge 33 | 133916,"However, they have been facing many problems with rapid urbanization and changes in the traditional joint families .",1,758,2004,BJP,No Pledge 34 | 133917,We will amend the Service Rules to reserve for women 33 per cent of appointments to posts in the Central Government .,1,697,2019,INC,Pledge 35 | 133918,"Space research, peaceful uses of nuclear energy, and civilian applications of defense research will be further promoted, building on India’s self - reliant strides in these areas .",1,433,2004,BJP,Pledge 36 | 133919,Congress reiterates its firm belief in a high rate of growth of GDP .,1,262,2019,INC,No Pledge 37 | 133920,This must be reinforced by other programmes for the development and diversification of the rural economy .,1,270,1999,INC,No Pledge 38 | 133921,"We will ensure a 'Swachh Bharat' by Gandhiji's 150th birth anniversary in 2019, taking it up in mission mode by converging resources and building around jan bhagidari :",1,583,2014,BJP,Pledge 39 | 133922,The Indian National Congress also proposes to reserve one - third of all central government jobs for women .,1,278,2009,INC,No Pledge 40 | 133923,One of the main reasons for this is the absence of proper urban and rural sanitation and poor liquid and solid waste management .,1,346,1999,INC,No Pledge 41 | 133924,"We have constructed over 9 crore toilets under our flagship programme, Swachh Bharat Mission .",1,275,2019,BJP,No Pledge 42 | 133925,Fiscal incentives to promote employment - intensive growth will be introduced .,1,175,2004,INC,Pledge 43 | 133926,"We are committed to establishing a civilised, humane and just civil order ; that which does not discriminate on ground — of caste, religion, class, colour, race or sex .",1,166,1999,BJP,No Pledge 44 | 133927,It is the Congress that began the process of extending social security to the 93 per cent work force that subsists In the unorganized sector .,1,46,2004,INC,No Pledge 45 | 133928,Exempt income up to Rs 3 lakh from Income Tax .,1,252,2009,BJP,No Pledge 46 | 133929,"We will further encourage integrated development of coastal areas including coastal cities, coastal transport and coastal industrialization .",1,308,2019,BJP,Pledge 47 | 133930,"The Indian National Congress is the only party that combines experience and youth, wisdom and exuberance, achievement and ambition .",1,12,2009,INC,No Pledge 48 | 133931,"Congress promises an adequately capitalised Tourism Development Bank to provide low - cost, long - term funds for investment in tourism - related businesses .",1,119,2019,INC,Pledge 49 | 133932,The Indian National Congress also acknowledges the vital role remittances by overseas Indians play in bolstering the country’s finances .,1,417,2009,INC,No Pledge 50 | 133933,"At the same time the country cannot do without FDI because besides capital stock it brings with it technology, new market practices and most importantly employment .",1,59,1999,BJP,No Pledge 51 | 133934,"New godowns, storage facilities, cold storage nehvorks and access roads will get priority .",1,244,1999,INC,No Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group2/tabea.krauter.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133885,"Mahila Sashaktikaran, to provide for the political empowerment and full educational, economic and legal equality for women ;",1,164,2004,INC,No Pledge 3 | 133886,We will : set up Gas Grids to make gas available to households and industry .,1,761,2014,BJP,No Pledge 4 | 133887,For reaching this goal we will take effective steps to create a riot - free order and a terrorism - free India .,1,156,1999,BJP,No Pledge 5 | 133888,We will take the Mission to a new level through sustainable Solid Waste Management in every village .,1,276,2019,BJP,No Pledge 6 | 133889,All controls on the free movement of farm commodities and processing of agricultural products and all regulations that depress incomes of farmers will be systematically eliminated .,1,241,2009,INC,No Pledge 7 | 133890,The Court of Appeal will sit in multiple Benches of 3 judges each in 6 locations .,1,627,2019,INC,Pledge 8 | 133891,4 . We will provide skills training to 10 crore youth and provide them with employment opportunities over the next 5 years .,1,147,2014,INC,Pledge 9 | 133892,The Indian National Congress is committed to ensuring these projects are completed in a time - bound manner .,1,566,2014,INC,No Pledge 10 | 133893,All efforts will be made for the eradication of filariasis within five years .,1,682,2004,BJP,No Pledge 11 | 133894,"On the other side is an ideology of hate, bigotry and prejudice, an ideology that negates the very idea of India that has been cherished by us for centuries .",1,780,2014,INC,No Pledge 12 | 133895,A wider and more comprehensive spectrum of skills will be imparted .,1,228,2009,INC,No Pledge 13 | 133896,"For this, the government shall introduce time - bound programmes of needed administrative reforms including those for the Police and other Civil Services .",1,50,1999,BJP,Pledge 14 | 133897,We will also enact a comprehensive Broadcasting Bill to regulate private broadcasting and to protect Indian interests .,1,184,1999,BJP,Pledge 15 | 133898,"An initial set of identified schemes will be implemented, with public participation, by 2015 .",1,315,2004,BJP,Pledge 16 | 133899,The Indian National Congress has declared the sacred Ganga as a “national river” .,1,360,2009,INC,No Pledge 17 | 133900,A time to reestablish the core values that have made India unique .,1,1,2004,INC,No Pledge 18 | 133901,National scholarships for boys and girls belonging to scheduled castes and scheduled tribes will be further increased .,1,255,2009,INC,Pledge 19 | 133902,"More funds will be allocated to sports, and we will encourage the State Governments to fully discharge their responsibility in the promotion of sports .",1,442,2014,BJP,Pledge 20 | 133903,"Like it did for panchayats, the Congress will bring forward a Constitutional amendment to ensure the democratic, autonomous and professional functioning of all cooperatives .",1,220,2004,INC,Pledge 21 | 133904,"After agriculture, the retail sector is the largest employer of nearly four crore people .",1,333,2009,BJP,No Pledge 22 | 133905,"Key leverage technologies such as information technology, biotechnology and materials science and technology would be given special importance .",1,425,2009,BJP,No Pledge 23 | 133906,"We will implement a solid waste management plan in every habitation, village, town and city employing modern technology and machinery .",1,996,2019,INC,Pledge 24 | 133907,It means that we will facilitate the domestic industry to gain enough muscles to compete with the multinationals in the local and global markets .,1,57,1999,BJP,Pledge 25 | 133908,"A new national movement for sanitation and hygiene, along the lines launched by Gandhijl during the Freedom movement will now be started and spearheaded by the Congress .",1,348,1999,INC,No Pledge 26 | 133909,It also pledges a major programme for training of teachers and improving the physical environment in schools .,1,219,2009,INC,No Pledge 27 | 133910,We appeal for the cooperation of all parties and all sections of society in this great endeavour .,1,203,1999,BJP,No Pledge 28 | 133911,"New international airport projects for Delhi, Mumbai, Bangalore and Hyderabad will be completed within the next three to four years and the Prime Minister will inaugurate them before August 15, 2008 .",1,290,2004,BJP,Pledge 29 | 133912,"We will enact the Lok Pal Bill with adequate powers to deal with corruption charges against anyone, including the Prime Minister .",1,146,1999,BJP,Pledge 30 | 133913,Our target is to achieve at least $10 billion per year which will commensurate with our growth objectives .,1,60,1999,BJP,No Pledge 31 | 133914,The rate of both public and private investment has declined over the period that the BJP - led NDA government has been in office .,1,112,2004,INC,No Pledge 32 | 133915,We will undertake all necessary legislative and administrative measures to ensure the right of franchise of the Armed Forces through proxy voting and or any other method .,1,145,1999,BJP,Pledge 33 | 133916,"However, they have been facing many problems with rapid urbanization and changes in the traditional joint families .",1,758,2004,BJP,No Pledge 34 | 133917,We will amend the Service Rules to reserve for women 33 per cent of appointments to posts in the Central Government .,1,697,2019,INC,Pledge 35 | 133918,"Space research, peaceful uses of nuclear energy, and civilian applications of defense research will be further promoted, building on India’s self - reliant strides in these areas .",1,433,2004,BJP,No Pledge 36 | 133919,Congress reiterates its firm belief in a high rate of growth of GDP .,1,262,2019,INC,No Pledge 37 | 133920,This must be reinforced by other programmes for the development and diversification of the rural economy .,1,270,1999,INC,No Pledge 38 | 133921,"We will ensure a 'Swachh Bharat' by Gandhiji's 150th birth anniversary in 2019, taking it up in mission mode by converging resources and building around jan bhagidari :",1,583,2014,BJP,Pledge 39 | 133922,The Indian National Congress also proposes to reserve one - third of all central government jobs for women .,1,278,2009,INC,No Pledge 40 | 133923,One of the main reasons for this is the absence of proper urban and rural sanitation and poor liquid and solid waste management .,1,346,1999,INC,No Pledge 41 | 133924,"We have constructed over 9 crore toilets under our flagship programme, Swachh Bharat Mission .",1,275,2019,BJP,No Pledge 42 | 133925,Fiscal incentives to promote employment - intensive growth will be introduced .,1,175,2004,INC,No Pledge 43 | 133926,"We are committed to establishing a civilised, humane and just civil order ; that which does not discriminate on ground — of caste, religion, class, colour, race or sex .",1,166,1999,BJP,No Pledge 44 | 133927,It is the Congress that began the process of extending social security to the 93 per cent work force that subsists In the unorganized sector .,1,46,2004,INC,No Pledge 45 | 133928,Exempt income up to Rs 3 lakh from Income Tax .,1,252,2009,BJP,No Pledge 46 | 133929,"We will further encourage integrated development of coastal areas including coastal cities, coastal transport and coastal industrialization .",1,308,2019,BJP,No Pledge 47 | 133930,"The Indian National Congress is the only party that combines experience and youth, wisdom and exuberance, achievement and ambition .",1,12,2009,INC,No Pledge 48 | 133931,"Congress promises an adequately capitalised Tourism Development Bank to provide low - cost, long - term funds for investment in tourism - related businesses .",1,119,2019,INC,No Pledge 49 | 133932,The Indian National Congress also acknowledges the vital role remittances by overseas Indians play in bolstering the country’s finances .,1,417,2009,INC,No Pledge 50 | 133933,"At the same time the country cannot do without FDI because besides capital stock it brings with it technology, new market practices and most importantly employment .",1,59,1999,BJP,No Pledge 51 | 133934,"New godowns, storage facilities, cold storage nehvorks and access roads will get priority .",1,244,1999,INC,No Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group3/johanneskuhling.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133935,10 . Protecting Indians overseas from exploitation or threats will remain a paramount concern of the Indian National Congress .,2,773,2014,INC,No Pledge 3 | 133936,An ‘Extremely Backward Communities Development Bank’ will be set up for promoting skill enhancement through learn - and - earn schemes for their uplift .,2,534,2009,BJP,Pledge 4 | 133937,"Similarly, almost all remaining households have been provided with an electricity connection .",2,313,2019,BJP,No Pledge 5 | 133938,We will expand this initiative further to take the number of beneficiaries of Mudra loans up to 30 crore .,2,444,2019,BJP,Pledge 6 | 133939,"In this sacred endeavour, the Congress has joined hands with like - minded political parties in different states .",2,19,2004,INC,No Pledge 7 | 133940,"The Indian National Congress has always stood for its democratic, autonomous and professional functioning and this will be ensured by enacting appropriate laws, including making a Constitutional provision .",2,249,2009,INC,No Pledge 8 | 133941,We are committed to annulling Article 35A of the Constitution of India as the provision is discriminatory against non - permanent residents and women of Jammu and Kashmir .,2,101,2019,BJP,Pledge 9 | 133942,"A suitable law, enabling micro - credit operations and nurturing them to be scaled up, will be enacted .",2,518,2004,BJP,Pledge 10 | 133943,"Education at all stages — primary, secondary and university — will be free in all respects for boys and girls belonging to dalit and adivasi communities .",2,253,2009,INC,Pledge 11 | 133944,The Indian National Congress is committed to providing clean cooking fuel across the country in an accelerated manner in order to address this .,2,630,2014,INC,No Pledge 12 | 133945,The party believes in the principle of unity in diversity .,2,147,2014,BJP,No Pledge 13 | 133946,We will increase the strength of the diplomatic and allied cadres to keep pace with our increasing global engagement and enhanced stature of India in the world .,2,642,2019,BJP,Pledge 14 | 133947,"provide tax incentives for investments in research and development, geared towards indigenization of technology and innovation .",2,616,2014,BJP,Pledge 15 | 133948,Most of the plans had been finalized before the Congress demitted office .,2,117,2004,INC,No Pledge 16 | 133949,The manifesto is an opportunity for a political party to present its agenda for the future embodying the loftiest hopes and noblest aspirations of the people .,2,963,1999,INC,No Pledge 17 | 133950,It has launched the National Rural Health Mission which has already made a positive impact by improving the quality and accessibility of primary health care in villages .,2,117,2009,INC,No Pledge 18 | 133951,"There shall be a special survey, which will be undertaken once every five years to estimate, on select development indicators, the gap in the development of these groups .",2,343,2014,INC,Pledge 19 | 133952,Launch a National Digital Highway Development Project to bring affordable broadband Internet connectivity to every village .,2,450,2009,BJP,Pledge 20 | 133953,Dispense with clubbing of agricultural income with other sources of income for determining tax liability on other income .,2,256,2009,BJP,No Pledge 21 | 133954,"PHCs will provide all primary health services, including preventive measures and wellness services, and become referral centres for serious medical cases .",2,863,2019,INC,Pledge 22 | 133955,The recently established National Security Council will advise the government in this regard and also in the establishment of a credible nuclear deterrence .,2,150,1999,BJP,No Pledge 23 | 133956,Congress will work with State Governments to ensure that a child receives a good quality education and that this is reflected in learning outcomes .,2,948,2019,INC,Pledge 24 | 133957,"It is, as has become painfully evident, not confined to any community or any political persuasion .",2,69,2009,INC,No Pledge 25 | 133958,"9 . We will set up a “National Panchayati Raj Commission”, with branches in each State and Union Territory, so that :",2,481,2014,INC,Pledge 26 | 133959,The BJP will set up an experts committee to deal with the following issues :,2,354,2009,BJP,Pledge 27 | 133960,We aspire to be the world's third largest economy by 2030 .,2,18,2019,BJP,Pledge 28 | 133961,Deployment of broadband in every village would be a thrust area .,2,197,2014,BJP,Pledge 29 | 133962,The Credit Guarantee Scheme of the Government of India is an important component wherein loans to MSME are guaranteed .,2,210,2019,BJP,No Pledge 30 | 133963,We will offer higher interest rates on fixed deposits by divyangs .,2,572,2019,BJP,Pledge 31 | 133964,"Working of regulatory bodies, which oversee medical education in the country, will be reviewed to improve standards .",2,700,2004,BJP,Pledge 32 | 133965,MGNREGA will also be harnessed to support the construction of poultry shelters and water bodies for fisheries .,2,503,2014,INC,Pledge 33 | 133966,We reach out to the minorities and even at the cost of repetition proclaim that we will safeguard the rights as enshrined in our Constitution .,2,36,1999,BJP,No Pledge 34 | 133967,3 . Announce a detailed Jobs Agenda to ensure that we create 10 crore new jobs and entrepreneurship opportunities for our youth .,2,241,2014,INC,Pledge 35 | 133968,"India will engage with the world in the global war on terror while not compromising on its domestic interests, primarily protecting citizens from the ravages of terrorism .",2,107,2009,BJP,No Pledge 36 | 133969,Saving Rama Setu is to save the vast thorium deposits which are the future source of our energy .,2,738,2009,BJP,No Pledge 37 | 133970,We propose to take strong measures to promote the manufacturing industry .,2,523,2014,INC,No Pledge 38 | 133971,The Congress will ensure the fullest implementation of minimum wage laws for farm labour .,2,216,2004,INC,Pledge 39 | 133972,The Indian National Congress will bring school curricula of various communal and sectarian organizations — regardless of their affiliation —under the regulatory purview of an empowered national body .,2,288,2009,INC,Pledge 40 | 133973,The present system of setting Minimum Support Prices by the Commission on Agricultural Costs and Prices will be reviewed to further benefit all kisans .,2,140,2004,BJP,No Pledge 41 | 133974,"A ‘Vishwakarma Initiative’ will be launched for craftspersons and rural artisans to preserve their traditional skills and knowledge, upgrade them, and adapt to new challenges .",2,404,2004,BJP,Pledge 42 | 133975,"We want an India which we all feel part of, in whose future we all have a stake .",2,44,1999,BJP,No Pledge 43 | 133976,"But at the same time, governance in these states has to improve vastly .",2,775,1999,INC,No Pledge 44 | 133977,"The NDA Government envisions a future that rests on a cooperative multipolar world order, with India as one of the poles .",2,890,2004,BJP,No Pledge 45 | 133978,"Continuing our work, we will legislate a bill to prohibit and eliminate practices such as Triple Talaq and Nikah Halala .",2,514,2019,BJP,Pledge 46 | 133979,"Flawed design, inefficient execution, insufficient capacity and poor maintenance of infrastructure have dragged India’s growth rate down .",2,146,2019,INC,No Pledge 47 | 133980,The sustained campaign led by the Congress President has resulted in the declaration of Gandhi Jayanti as International Day of Non - Violence by the United Nations .,2,140,2009,INC,No Pledge 48 | 133981,"Along the lines of NREGA, we will enact a National Food Security Act .",2,200,2009,INC,Pledge 49 | 133982,21 Ensure the LPG gas cylinder connection to all poor rural households .,2,675,2019,BJP,Pledge 50 | 133983,a . We will enact central legislation on the Scheduled Castes and Scheduled Tribes Sub Plans to ensure focused spending of funds on weaker sections .,2,164,2014,INC,No Pledge 51 | 133984,"Launch a massive programme to detect, detain and deport illegal immigrants .",2,88,2009,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group3/lopatina.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133935,10 . Protecting Indians overseas from exploitation or threats will remain a paramount concern of the Indian National Congress .,2,773,2014,INC,No Pledge 3 | 133936,An ‘Extremely Backward Communities Development Bank’ will be set up for promoting skill enhancement through learn - and - earn schemes for their uplift .,2,534,2009,BJP,Pledge 4 | 133937,"Similarly, almost all remaining households have been provided with an electricity connection .",2,313,2019,BJP,No Pledge 5 | 133938,We will expand this initiative further to take the number of beneficiaries of Mudra loans up to 30 crore .,2,444,2019,BJP,Pledge 6 | 133939,"In this sacred endeavour, the Congress has joined hands with like - minded political parties in different states .",2,19,2004,INC,No Pledge 7 | 133940,"The Indian National Congress has always stood for its democratic, autonomous and professional functioning and this will be ensured by enacting appropriate laws, including making a Constitutional provision .",2,249,2009,INC,No Pledge 8 | 133941,We are committed to annulling Article 35A of the Constitution of India as the provision is discriminatory against non - permanent residents and women of Jammu and Kashmir .,2,101,2019,BJP,Pledge 9 | 133942,"A suitable law, enabling micro - credit operations and nurturing them to be scaled up, will be enacted .",2,518,2004,BJP,Pledge 10 | 133943,"Education at all stages — primary, secondary and university — will be free in all respects for boys and girls belonging to dalit and adivasi communities .",2,253,2009,INC,Pledge 11 | 133944,The Indian National Congress is committed to providing clean cooking fuel across the country in an accelerated manner in order to address this .,2,630,2014,INC,Pledge 12 | 133945,The party believes in the principle of unity in diversity .,2,147,2014,BJP,No Pledge 13 | 133946,We will increase the strength of the diplomatic and allied cadres to keep pace with our increasing global engagement and enhanced stature of India in the world .,2,642,2019,BJP,No Pledge 14 | 133947,"provide tax incentives for investments in research and development, geared towards indigenization of technology and innovation .",2,616,2014,BJP,Pledge 15 | 133948,Most of the plans had been finalized before the Congress demitted office .,2,117,2004,INC,No Pledge 16 | 133949,The manifesto is an opportunity for a political party to present its agenda for the future embodying the loftiest hopes and noblest aspirations of the people .,2,963,1999,INC,No Pledge 17 | 133950,It has launched the National Rural Health Mission which has already made a positive impact by improving the quality and accessibility of primary health care in villages .,2,117,2009,INC,No Pledge 18 | 133951,"There shall be a special survey, which will be undertaken once every five years to estimate, on select development indicators, the gap in the development of these groups .",2,343,2014,INC,No Pledge 19 | 133952,Launch a National Digital Highway Development Project to bring affordable broadband Internet connectivity to every village .,2,450,2009,BJP,Pledge 20 | 133953,Dispense with clubbing of agricultural income with other sources of income for determining tax liability on other income .,2,256,2009,BJP,No Pledge 21 | 133954,"PHCs will provide all primary health services, including preventive measures and wellness services, and become referral centres for serious medical cases .",2,863,2019,INC,Pledge 22 | 133955,The recently established National Security Council will advise the government in this regard and also in the establishment of a credible nuclear deterrence .,2,150,1999,BJP,Pledge 23 | 133956,Congress will work with State Governments to ensure that a child receives a good quality education and that this is reflected in learning outcomes .,2,948,2019,INC,No Pledge 24 | 133957,"It is, as has become painfully evident, not confined to any community or any political persuasion .",2,69,2009,INC,No Pledge 25 | 133958,"9 . We will set up a “National Panchayati Raj Commission”, with branches in each State and Union Territory, so that :",2,481,2014,INC,Pledge 26 | 133959,The BJP will set up an experts committee to deal with the following issues :,2,354,2009,BJP,No Pledge 27 | 133960,We aspire to be the world's third largest economy by 2030 .,2,18,2019,BJP,Pledge 28 | 133961,Deployment of broadband in every village would be a thrust area .,2,197,2014,BJP,No Pledge 29 | 133962,The Credit Guarantee Scheme of the Government of India is an important component wherein loans to MSME are guaranteed .,2,210,2019,BJP,No Pledge 30 | 133963,We will offer higher interest rates on fixed deposits by divyangs .,2,572,2019,BJP,Pledge 31 | 133964,"Working of regulatory bodies, which oversee medical education in the country, will be reviewed to improve standards .",2,700,2004,BJP,No Pledge 32 | 133965,MGNREGA will also be harnessed to support the construction of poultry shelters and water bodies for fisheries .,2,503,2014,INC,No Pledge 33 | 133966,We reach out to the minorities and even at the cost of repetition proclaim that we will safeguard the rights as enshrined in our Constitution .,2,36,1999,BJP,No Pledge 34 | 133967,3 . Announce a detailed Jobs Agenda to ensure that we create 10 crore new jobs and entrepreneurship opportunities for our youth .,2,241,2014,INC,Pledge 35 | 133968,"India will engage with the world in the global war on terror while not compromising on its domestic interests, primarily protecting citizens from the ravages of terrorism .",2,107,2009,BJP,No Pledge 36 | 133969,Saving Rama Setu is to save the vast thorium deposits which are the future source of our energy .,2,738,2009,BJP,No Pledge 37 | 133970,We propose to take strong measures to promote the manufacturing industry .,2,523,2014,INC,No Pledge 38 | 133971,The Congress will ensure the fullest implementation of minimum wage laws for farm labour .,2,216,2004,INC,Pledge 39 | 133972,The Indian National Congress will bring school curricula of various communal and sectarian organizations — regardless of their affiliation —under the regulatory purview of an empowered national body .,2,288,2009,INC,Pledge 40 | 133973,The present system of setting Minimum Support Prices by the Commission on Agricultural Costs and Prices will be reviewed to further benefit all kisans .,2,140,2004,BJP,No Pledge 41 | 133974,"A ‘Vishwakarma Initiative’ will be launched for craftspersons and rural artisans to preserve their traditional skills and knowledge, upgrade them, and adapt to new challenges .",2,404,2004,BJP,Pledge 42 | 133975,"We want an India which we all feel part of, in whose future we all have a stake .",2,44,1999,BJP,No Pledge 43 | 133976,"But at the same time, governance in these states has to improve vastly .",2,775,1999,INC,No Pledge 44 | 133977,"The NDA Government envisions a future that rests on a cooperative multipolar world order, with India as one of the poles .",2,890,2004,BJP,No Pledge 45 | 133978,"Continuing our work, we will legislate a bill to prohibit and eliminate practices such as Triple Talaq and Nikah Halala .",2,514,2019,BJP,Pledge 46 | 133979,"Flawed design, inefficient execution, insufficient capacity and poor maintenance of infrastructure have dragged India’s growth rate down .",2,146,2019,INC,No Pledge 47 | 133980,The sustained campaign led by the Congress President has resulted in the declaration of Gandhi Jayanti as International Day of Non - Violence by the United Nations .,2,140,2009,INC,No Pledge 48 | 133981,"Along the lines of NREGA, we will enact a National Food Security Act .",2,200,2009,INC,Pledge 49 | 133982,21 Ensure the LPG gas cylinder connection to all poor rural households .,2,675,2019,BJP,Pledge 50 | 133983,a . We will enact central legislation on the Scheduled Castes and Scheduled Tribes Sub Plans to ensure focused spending of funds on weaker sections .,2,164,2014,INC,Pledge 51 | 133984,"Launch a massive programme to detect, detain and deport illegal immigrants .",2,88,2009,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/group3/luisa.kutlar.csv: -------------------------------------------------------------------------------- 1 | id,text,split_,text_id,metadata__year,metadata__party,label 2 | 133935,10 . Protecting Indians overseas from exploitation or threats will remain a paramount concern of the Indian National Congress .,2,773,2014,INC,No Pledge 3 | 133936,An ‘Extremely Backward Communities Development Bank’ will be set up for promoting skill enhancement through learn - and - earn schemes for their uplift .,2,534,2009,BJP,Pledge 4 | 133937,"Similarly, almost all remaining households have been provided with an electricity connection .",2,313,2019,BJP,No Pledge 5 | 133938,We will expand this initiative further to take the number of beneficiaries of Mudra loans up to 30 crore .,2,444,2019,BJP,Pledge 6 | 133939,"In this sacred endeavour, the Congress has joined hands with like - minded political parties in different states .",2,19,2004,INC,No Pledge 7 | 133940,"The Indian National Congress has always stood for its democratic, autonomous and professional functioning and this will be ensured by enacting appropriate laws, including making a Constitutional provision .",2,249,2009,INC,Pledge 8 | 133941,We are committed to annulling Article 35A of the Constitution of India as the provision is discriminatory against non - permanent residents and women of Jammu and Kashmir .,2,101,2019,BJP,Pledge 9 | 133942,"A suitable law, enabling micro - credit operations and nurturing them to be scaled up, will be enacted .",2,518,2004,BJP,Pledge 10 | 133943,"Education at all stages — primary, secondary and university — will be free in all respects for boys and girls belonging to dalit and adivasi communities .",2,253,2009,INC,Pledge 11 | 133944,The Indian National Congress is committed to providing clean cooking fuel across the country in an accelerated manner in order to address this .,2,630,2014,INC,Pledge 12 | 133945,The party believes in the principle of unity in diversity .,2,147,2014,BJP,No Pledge 13 | 133946,We will increase the strength of the diplomatic and allied cadres to keep pace with our increasing global engagement and enhanced stature of India in the world .,2,642,2019,BJP,No Pledge 14 | 133947,"provide tax incentives for investments in research and development, geared towards indigenization of technology and innovation .",2,616,2014,BJP,Pledge 15 | 133948,Most of the plans had been finalized before the Congress demitted office .,2,117,2004,INC,No Pledge 16 | 133949,The manifesto is an opportunity for a political party to present its agenda for the future embodying the loftiest hopes and noblest aspirations of the people .,2,963,1999,INC,No Pledge 17 | 133950,It has launched the National Rural Health Mission which has already made a positive impact by improving the quality and accessibility of primary health care in villages .,2,117,2009,INC,No Pledge 18 | 133951,"There shall be a special survey, which will be undertaken once every five years to estimate, on select development indicators, the gap in the development of these groups .",2,343,2014,INC,Pledge 19 | 133952,Launch a National Digital Highway Development Project to bring affordable broadband Internet connectivity to every village .,2,450,2009,BJP,Pledge 20 | 133953,Dispense with clubbing of agricultural income with other sources of income for determining tax liability on other income .,2,256,2009,BJP,No Pledge 21 | 133954,"PHCs will provide all primary health services, including preventive measures and wellness services, and become referral centres for serious medical cases .",2,863,2019,INC,Pledge 22 | 133955,The recently established National Security Council will advise the government in this regard and also in the establishment of a credible nuclear deterrence .,2,150,1999,BJP,No Pledge 23 | 133956,Congress will work with State Governments to ensure that a child receives a good quality education and that this is reflected in learning outcomes .,2,948,2019,INC,No Pledge 24 | 133957,"It is, as has become painfully evident, not confined to any community or any political persuasion .",2,69,2009,INC,No Pledge 25 | 133958,"9 . We will set up a “National Panchayati Raj Commission”, with branches in each State and Union Territory, so that :",2,481,2014,INC,Pledge 26 | 133959,The BJP will set up an experts committee to deal with the following issues :,2,354,2009,BJP,Pledge 27 | 133960,We aspire to be the world's third largest economy by 2030 .,2,18,2019,BJP,Pledge 28 | 133961,Deployment of broadband in every village would be a thrust area .,2,197,2014,BJP,No Pledge 29 | 133962,The Credit Guarantee Scheme of the Government of India is an important component wherein loans to MSME are guaranteed .,2,210,2019,BJP,No Pledge 30 | 133963,We will offer higher interest rates on fixed deposits by divyangs .,2,572,2019,BJP,Pledge 31 | 133964,"Working of regulatory bodies, which oversee medical education in the country, will be reviewed to improve standards .",2,700,2004,BJP,No Pledge 32 | 133965,MGNREGA will also be harnessed to support the construction of poultry shelters and water bodies for fisheries .,2,503,2014,INC,No Pledge 33 | 133966,We reach out to the minorities and even at the cost of repetition proclaim that we will safeguard the rights as enshrined in our Constitution .,2,36,1999,BJP,Pledge 34 | 133967,3 . Announce a detailed Jobs Agenda to ensure that we create 10 crore new jobs and entrepreneurship opportunities for our youth .,2,241,2014,INC,No Pledge 35 | 133968,"India will engage with the world in the global war on terror while not compromising on its domestic interests, primarily protecting citizens from the ravages of terrorism .",2,107,2009,BJP,No Pledge 36 | 133969,Saving Rama Setu is to save the vast thorium deposits which are the future source of our energy .,2,738,2009,BJP,No Pledge 37 | 133970,We propose to take strong measures to promote the manufacturing industry .,2,523,2014,INC,No Pledge 38 | 133971,The Congress will ensure the fullest implementation of minimum wage laws for farm labour .,2,216,2004,INC,Pledge 39 | 133972,The Indian National Congress will bring school curricula of various communal and sectarian organizations — regardless of their affiliation —under the regulatory purview of an empowered national body .,2,288,2009,INC,Pledge 40 | 133973,The present system of setting Minimum Support Prices by the Commission on Agricultural Costs and Prices will be reviewed to further benefit all kisans .,2,140,2004,BJP,No Pledge 41 | 133974,"A ‘Vishwakarma Initiative’ will be launched for craftspersons and rural artisans to preserve their traditional skills and knowledge, upgrade them, and adapt to new challenges .",2,404,2004,BJP,Pledge 42 | 133975,"We want an India which we all feel part of, in whose future we all have a stake .",2,44,1999,BJP,No Pledge 43 | 133976,"But at the same time, governance in these states has to improve vastly .",2,775,1999,INC,No Pledge 44 | 133977,"The NDA Government envisions a future that rests on a cooperative multipolar world order, with India as one of the poles .",2,890,2004,BJP,No Pledge 45 | 133978,"Continuing our work, we will legislate a bill to prohibit and eliminate practices such as Triple Talaq and Nikah Halala .",2,514,2019,BJP,Pledge 46 | 133979,"Flawed design, inefficient execution, insufficient capacity and poor maintenance of infrastructure have dragged India’s growth rate down .",2,146,2019,INC,No Pledge 47 | 133980,The sustained campaign led by the Congress President has resulted in the declaration of Gandhi Jayanti as International Day of Non - Violence by the United Nations .,2,140,2009,INC,No Pledge 48 | 133981,"Along the lines of NREGA, we will enact a National Food Security Act .",2,200,2009,INC,Pledge 49 | 133982,21 Ensure the LPG gas cylinder connection to all poor rural households .,2,675,2019,BJP,Pledge 50 | 133983,a . We will enact central legislation on the Scheduled Castes and Scheduled Tribes Sub Plans to ensure focused spending of funds on weaker sections .,2,164,2014,INC,Pledge 51 | 133984,"Launch a massive programme to detect, detain and deport illegal immigrants .",2,88,2009,BJP,Pledge 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/llms/DeepSeek-V3-0324.csv: -------------------------------------------------------------------------------- 1 | text,label,text_id,metadata__party,metadata__year,split_ 2 | "Faced by this aggression in Kargil, the response of the Government was swift, though measured .",No Pledge,9,BJP,1999,0 3 | This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,No Pledge,17,INC,2004,0 4 | "The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",No Pledge,42,BJP,2004,0 5 | "4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",No Pledge,58,INC,2019,0 6 | "They are, in fact, responsible for the electoral growth of the BJP .",No Pledge,62,INC,2009,0 7 | Involve the state Governments in the promotion of foreign trade and commerce .,No Pledge,92,BJP,2014,0 8 | We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,No Pledge,140,BJP,2019,0 9 | We will also introduce a multi - purpose identity card for all citizens .,No Pledge,144,BJP,1999,0 10 | "It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",No Pledge,148,BJP,2004,0 11 | This shall be responsible for setting and enforcing standards for all food products .,No Pledge,157,BJP,2004,0 12 | a . We will ensure the passage of the Women’s Reservation Bill .,Pledge,160,INC,2014,0 13 | 11 . We will strengthen the legal and institutional framework to protect our children .,No Pledge,168,INC,2014,0 14 | More specialist battalions will be raised and positioned in key locations across the country .,No Pledge,181,INC,2009,0 15 | We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,Pledge,185,BJP,1999,0 16 | "Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",No Pledge,189,INC,2004,0 17 | "India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",No Pledge,197,BJP,2009,0 18 | "Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",Pledge,207,INC,2019,0 19 | Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,No Pledge,212,INC,1999,0 20 | "The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",No Pledge,218,BJP,2004,0 21 | Targeting time spent for tax compliance at 1 hour per month .,Pledge,227,BJP,2019,0 22 | Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,No Pledge,228,BJP,2009,0 23 | "The India of tomorrow will have 125 crore such dreams, and will be built on the same .",No Pledge,279,BJP,2014,0 24 | Antyodaya cards for all households at risk of hunger will be introduced .,Pledge,287,INC,2004,0 25 | The Indian National Congress has endeavoured to provide quality public health services to all citizens .,No Pledge,288,INC,2014,0 26 | The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,No Pledge,323,BJP,2004,0 27 | The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,No Pledge,331,INC,2009,0 28 | "We will introduce the goods and services tax from April 1, 2010 .",Pledge,335,INC,2009,0 29 | "Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",Pledge,336,BJP,2004,0 30 | "While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",No Pledge,343,INC,2009,0 31 | A raw material use policy will be unveiled in the mines sector .,No Pledge,370,BJP,2004,0 32 | A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,No Pledge,398,INC,2004,0 33 | Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,Pledge,406,INC,2019,0 34 | "New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",No Pledge,423,INC,1999,0 35 | These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,No Pledge,469,BJP,2019,0 36 | We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,No Pledge,470,BJP,2019,0 37 | "Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",No Pledge,490,BJP,2014,0 38 | "A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",No Pledge,502,BJP,2009,0 39 | "Art, culture and heritage constitute the identity of a people .",No Pledge,515,INC,2019,0 40 | We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,Pledge,574,BJP,2019,0 41 | "Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",No Pledge,587,BJP,2014,0 42 | Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,Pledge,604,BJP,2004,0 43 | This will promote competition and enhance efficiency in procurement processes .,No Pledge,681,INC,2014,0 44 | Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,No Pledge,682,BJP,2009,0 45 | "In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",No Pledge,687,BJP,2009,0 46 | The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,No Pledge,718,INC,1999,0 47 | The number of courts and the number of judges will be doubled in five years for quicker judicial process .,Pledge,773,BJP,2004,0 48 | "In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",No Pledge,788,INC,2019,0 49 | The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,No Pledge,799,BJP,2004,0 50 | Joint projects in the energy sector will be actively explored .,No Pledge,938,INC,1999,0 51 | "Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",No Pledge,1030,INC,2019,0 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/llms/Llama-4-Maverick-17B-128E-Instruct.csv: -------------------------------------------------------------------------------- 1 | text,label,text_id,metadata__party,metadata__year,split_ 2 | "Faced by this aggression in Kargil, the response of the Government was swift, though measured .",No Pledge,9,BJP,1999,0 3 | This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,No Pledge,17,INC,2004,0 4 | "The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",No Pledge,42,BJP,2004,0 5 | "4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",No Pledge,58,INC,2019,0 6 | "They are, in fact, responsible for the electoral growth of the BJP .",No Pledge,62,INC,2009,0 7 | Involve the state Governments in the promotion of foreign trade and commerce .,No Pledge,92,BJP,2014,0 8 | We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,No Pledge,140,BJP,2019,0 9 | We will also introduce a multi - purpose identity card for all citizens .,Pledge,144,BJP,1999,0 10 | "It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",No Pledge,148,BJP,2004,0 11 | This shall be responsible for setting and enforcing standards for all food products .,No Pledge,157,BJP,2004,0 12 | a . We will ensure the passage of the Women’s Reservation Bill .,Pledge,160,INC,2014,0 13 | 11 . We will strengthen the legal and institutional framework to protect our children .,No Pledge,168,INC,2014,0 14 | More specialist battalions will be raised and positioned in key locations across the country .,Pledge,181,INC,2009,0 15 | We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,Pledge,185,BJP,1999,0 16 | "Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",No Pledge,189,INC,2004,0 17 | "India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",Pledge,197,BJP,2009,0 18 | "Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",Pledge,207,INC,2019,0 19 | Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,No Pledge,212,INC,1999,0 20 | "The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",No Pledge,218,BJP,2004,0 21 | Targeting time spent for tax compliance at 1 hour per month .,Pledge,227,BJP,2019,0 22 | Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,No Pledge,228,BJP,2009,0 23 | "The India of tomorrow will have 125 crore such dreams, and will be built on the same .",No Pledge,279,BJP,2014,0 24 | Antyodaya cards for all households at risk of hunger will be introduced .,Pledge,287,INC,2004,0 25 | The Indian National Congress has endeavoured to provide quality public health services to all citizens .,No Pledge,288,INC,2014,0 26 | The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,No Pledge,323,BJP,2004,0 27 | The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,No Pledge,331,INC,2009,0 28 | "We will introduce the goods and services tax from April 1, 2010 .",Pledge,335,INC,2009,0 29 | "Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",Pledge,336,BJP,2004,0 30 | "While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",No Pledge,343,INC,2009,0 31 | A raw material use policy will be unveiled in the mines sector .,No Pledge,370,BJP,2004,0 32 | A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,No Pledge,398,INC,2004,0 33 | Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,Pledge,406,INC,2019,0 34 | "New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",Pledge,423,INC,1999,0 35 | These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,No Pledge,469,BJP,2019,0 36 | We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,No Pledge,470,BJP,2019,0 37 | "Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",No Pledge,490,BJP,2014,0 38 | "A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",Pledge,502,BJP,2009,0 39 | "Art, culture and heritage constitute the identity of a people .",No Pledge,515,INC,2019,0 40 | We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,Pledge,574,BJP,2019,0 41 | "Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",Pledge,587,BJP,2014,0 42 | Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,No Pledge,604,BJP,2004,0 43 | This will promote competition and enhance efficiency in procurement processes .,No Pledge,681,INC,2014,0 44 | Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,No Pledge,682,BJP,2009,0 45 | "In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",No Pledge,687,BJP,2009,0 46 | The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,No Pledge,718,INC,1999,0 47 | The number of courts and the number of judges will be doubled in five years for quicker judicial process .,Pledge,773,BJP,2004,0 48 | "In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",No Pledge,788,INC,2019,0 49 | The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,No Pledge,799,BJP,2004,0 50 | Joint projects in the energy sector will be actively explored .,No Pledge,938,INC,1999,0 51 | "Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",No Pledge,1030,INC,2019,0 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/llms/Qwen3-235B-A22B-Instruct-2507.csv: -------------------------------------------------------------------------------- 1 | text,label,text_id,metadata__party,metadata__year,split_ 2 | "Faced by this aggression in Kargil, the response of the Government was swift, though measured .",No Pledge,9,BJP,1999,0 3 | This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,No Pledge,17,INC,2004,0 4 | "The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",No Pledge,42,BJP,2004,0 5 | "4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",No Pledge,58,INC,2019,0 6 | "They are, in fact, responsible for the electoral growth of the BJP .",No Pledge,62,INC,2009,0 7 | Involve the state Governments in the promotion of foreign trade and commerce .,No Pledge,92,BJP,2014,0 8 | We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,No Pledge,140,BJP,2019,0 9 | We will also introduce a multi - purpose identity card for all citizens .,Pledge,144,BJP,1999,0 10 | "It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",No Pledge,148,BJP,2004,0 11 | This shall be responsible for setting and enforcing standards for all food products .,No Pledge,157,BJP,2004,0 12 | a . We will ensure the passage of the Women’s Reservation Bill .,Pledge,160,INC,2014,0 13 | 11 . We will strengthen the legal and institutional framework to protect our children .,No Pledge,168,INC,2014,0 14 | More specialist battalions will be raised and positioned in key locations across the country .,No Pledge,181,INC,2009,0 15 | We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,Pledge,185,BJP,1999,0 16 | "Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",No Pledge,189,INC,2004,0 17 | "India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",Pledge,197,BJP,2009,0 18 | "Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",No Pledge,207,INC,2019,0 19 | Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,No Pledge,212,INC,1999,0 20 | "The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",No Pledge,218,BJP,2004,0 21 | Targeting time spent for tax compliance at 1 hour per month .,Pledge,227,BJP,2019,0 22 | Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,No Pledge,228,BJP,2009,0 23 | "The India of tomorrow will have 125 crore such dreams, and will be built on the same .",No Pledge,279,BJP,2014,0 24 | Antyodaya cards for all households at risk of hunger will be introduced .,Pledge,287,INC,2004,0 25 | The Indian National Congress has endeavoured to provide quality public health services to all citizens .,No Pledge,288,INC,2014,0 26 | The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,No Pledge,323,BJP,2004,0 27 | The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,No Pledge,331,INC,2009,0 28 | "We will introduce the goods and services tax from April 1, 2010 .",Pledge,335,INC,2009,0 29 | "Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",Pledge,336,BJP,2004,0 30 | "While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",No Pledge,343,INC,2009,0 31 | A raw material use policy will be unveiled in the mines sector .,No Pledge,370,BJP,2004,0 32 | A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,No Pledge,398,INC,2004,0 33 | Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,Pledge,406,INC,2019,0 34 | "New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",No Pledge,423,INC,1999,0 35 | These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,No Pledge,469,BJP,2019,0 36 | We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,No Pledge,470,BJP,2019,0 37 | "Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",No Pledge,490,BJP,2014,0 38 | "A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",Pledge,502,BJP,2009,0 39 | "Art, culture and heritage constitute the identity of a people .",No Pledge,515,INC,2019,0 40 | We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,Pledge,574,BJP,2019,0 41 | "Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",No Pledge,587,BJP,2014,0 42 | Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,Pledge,604,BJP,2004,0 43 | This will promote competition and enhance efficiency in procurement processes .,No Pledge,681,INC,2014,0 44 | Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,No Pledge,682,BJP,2009,0 45 | "In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",No Pledge,687,BJP,2009,0 46 | The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,Pledge,718,INC,1999,0 47 | The number of courts and the number of judges will be doubled in five years for quicker judicial process .,Pledge,773,BJP,2004,0 48 | "In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",No Pledge,788,INC,2019,0 49 | The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,No Pledge,799,BJP,2004,0 50 | Joint projects in the energy sector will be actively explored .,No Pledge,938,INC,1999,0 51 | "Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",No Pledge,1030,INC,2019,0 52 | -------------------------------------------------------------------------------- /data/labeled/fornaciari_we_2021/annotations/classification/llms/gpt-oss-120b.csv: -------------------------------------------------------------------------------- 1 | text,label,text_id,metadata__party,metadata__year,split_ 2 | "Faced by this aggression in Kargil, the response of the Government was swift, though measured .",No Pledge,9,BJP,1999,0 3 | This is the moment to consolidate all forces subscribing to the fundamental values of our Constitution .,No Pledge,17,INC,2004,0 4 | "The Rs . 1,000 - crore Sampoorna Grameen Rozgar Yojana, started by our Government, is the biggest food for - work program since Independence .",No Pledge,42,BJP,2004,0 5 | "4 lakh crores in lost output, lakhs of jobs and greater indebtedness .",No Pledge,58,INC,2019,0 6 | "They are, in fact, responsible for the electoral growth of the BJP .",No Pledge,62,INC,2009,0 7 | Involve the state Governments in the promotion of foreign trade and commerce .,No Pledge,92,BJP,2014,0 8 | We will extensively use technology to ensure a better knowledge of the market prices of various agro - products for the benefit of farmers .,No Pledge,140,BJP,2019,0 9 | We will also introduce a multi - purpose identity card for all citizens .,Pledge,144,BJP,1999,0 10 | "It will enable industry to access agriculture produce directly from farmers, and suggest ways to step up exports, including to organized retail outlets abroad .",No Pledge,148,BJP,2004,0 11 | This shall be responsible for setting and enforcing standards for all food products .,No Pledge,157,BJP,2004,0 12 | a . We will ensure the passage of the Women’s Reservation Bill .,Pledge,160,INC,2014,0 13 | 11 . We will strengthen the legal and institutional framework to protect our children .,No Pledge,168,INC,2014,0 14 | More specialist battalions will be raised and positioned in key locations across the country .,No Pledge,181,INC,2009,0 15 | We will restrict foreign equity holding in private television broadcasting to 20% (and prevent cross holding to avoid emergence of monopolies in the media) .,Pledge,185,BJP,1999,0 16 | "Along with vastly expanded credit facilities for self - employment, the services industry will be given all support to fulfill its true employment potential .",No Pledge,189,INC,2004,0 17 | "India’s indigenous thorium technology programme will be expedited and given all financial assistance, correcting the grievous wrong done by the UPA Government .",No Pledge,197,BJP,2009,0 18 | "Immediately after forming the governments in Chhattisgarh, Madhya Pradesh and Rajasthan, as promised, the 3 Congress Governments waived the loans of farmers .",No Pledge,207,INC,2019,0 19 | Small industry will be particularly encouraged in states and regions where the potential for large or heavy industry is limited .,No Pledge,212,INC,1999,0 20 | "The Total Sanitation Campaign, launched by the NDA Government in 1999, has been a remarkable success .",No Pledge,218,BJP,2004,0 21 | Targeting time spent for tax compliance at 1 hour per month .,Pledge,227,BJP,2019,0 22 | Encouraging the production of cereals and discouraging the conversion of fertile farm land for dubious industrial projects .,No Pledge,228,BJP,2009,0 23 | "The India of tomorrow will have 125 crore such dreams, and will be built on the same .",No Pledge,279,BJP,2014,0 24 | Antyodaya cards for all households at risk of hunger will be introduced .,Pledge,287,INC,2004,0 25 | The Indian National Congress has endeavoured to provide quality public health services to all citizens .,No Pledge,288,INC,2014,0 26 | The Ministry of Finance will be directed to provide all required resources to implement this program in a time - bound manner .,No Pledge,323,BJP,2004,0 27 | The emphasis in all foreign investment policies will be maximization of local value - addition and export potential .,No Pledge,331,INC,2009,0 28 | "We will introduce the goods and services tax from April 1, 2010 .",Pledge,335,INC,2009,0 29 | "Every consumer of electricity in India, including farmers, would be connected through digital, tamper - proof meters in the next three years .",Pledge,336,BJP,2004,0 30 | "While the bulk of our population still lives and works in villages, India is rapidly urbanizing .",No Pledge,343,INC,2009,0 31 | A raw material use policy will be unveiled in the mines sector .,Pledge,370,BJP,2004,0 32 | A detailed roadmap for accomplishing this will be unveiled within 30 days of coming to power so that a national consensus is also created .,No Pledge,398,INC,2004,0 33 | Congress promises to work with industry to increase the expenditure on science and technology to 2 per cent of GDP .,Pledge,406,INC,2019,0 34 | "New middle - level technical institutes in clusters where, for example, weavers and artisans are concentrated, will be started .",Pledge,423,INC,1999,0 35 | These institutions would provide four - year integrated courses that would set the standards for quality teachers in our schools .,No Pledge,469,BJP,2019,0 36 | We will also motivate States to replicate this model as it is our firm belief that quality teachers lay the foundation of quality learning .,No Pledge,470,BJP,2019,0 37 | "Highest priority would be given to address the acute shortage of teachers and researchers, quality of education and research, and also the employability factor associated with most of the courses .",No Pledge,490,BJP,2014,0 38 | "A national programme will be launched, in cooperation with State Governments, to provide bicycles to girls from Below Poverty Line Families who attend school .",Pledge,502,BJP,2009,0 39 | "Art, culture and heritage constitute the identity of a people .",No Pledge,515,INC,2019,0 40 | We will recognise the 11 left out Indian Gorkha sub - tribes as Schedule Tribes .,Pledge,574,BJP,2019,0 41 | "Make potable drinking water available to all thus reducing water - borne diseases, which will automatically translate into Diarrhoea - free India .",No Pledge,587,BJP,2014,0 42 | Reservations for the poor among ‘Forward Classes’ will be introduced after receiving recommendations of the Commission set up for this purpose .,Pledge,604,BJP,2004,0 43 | This will promote competition and enhance efficiency in procurement processes .,No Pledge,681,INC,2014,0 44 | Article 370 poses a psychological barrier for the full integration of the people of Jammu & Kashmir with the national mainstream .,No Pledge,682,BJP,2009,0 45 | "In consonance with its policy, the BJP supports the creation of Telangana as a separate State of the Union of India .",No Pledge,687,BJP,2009,0 46 | The Congress will identify those environmental management functions that could be delegated to the states and local bodies .,Pledge,718,INC,1999,0 47 | The number of courts and the number of judges will be doubled in five years for quicker judicial process .,No Pledge,773,BJP,2004,0 48 | "In the last 5 years under the BJP Government, hate crimes and atrocities against the minorities and other vulnerable sections of the people have increased manifold .",No Pledge,788,INC,2019,0 49 | The Planning Commission will be reformed and reorganized in light of the changing developmental needs of our country .,No Pledge,799,BJP,2004,0 50 | Joint projects in the energy sector will be actively explored .,No Pledge,938,INC,1999,0 51 | "Our sports policy will aim to inculcate in our citizens, especially students and youth, the culture of sports and fitness and will encourage sports as a career .",No Pledge,1030,INC,2019,0 52 | -------------------------------------------------------------------------------- /data/labeled/gilardi_chatgpt_2023/README.md: -------------------------------------------------------------------------------- 1 | # Gilardi et al. (2023): Evaluating ChatGPT for text classification 2 | 3 | author: Hauke Licht\ 4 | date: 2024-02-25 5 | 6 | ## Description 7 | 8 | In their 2023 PNAS paper "ChatGPT outperforms crowd workers for text-annotation tasks", 9 | Fabrizio Gilardi, Meysam Alizadeh, and Maël Kubli 10 | evaluate the performance of ChatGPT in zero-shot text classification. 11 | 12 | ## The data 13 | 14 | Their replication data records a sample of 6,183 documents, including tweets and news articles from Alizadeh et al. ([2022](https://doi.org/10.51685/jqd.2022.023)) and tweets posted in 2023 (not in training data of first ChatGPT model). 15 | 16 | The replication materials are available at https://doi.org/10.7910/DVN/PQYF6M 17 | 18 | ## Annotation procedure 19 | 20 | Their study focuses on content related to the topic of content moderation. 21 | 22 | Their data contains four corpora 23 | 24 | 1. a corpus of tweets posted pre-2023 by common Twitter user relating to the topic of content moderation 25 | 2. a simlar corpus of tweets posted by common Twitter users in 2023 (not covered by the ChatGPT model they evaluated) relating to the topic of content moderation 26 | 3. tweets posted by Members of the U.S. Congress relating to the topic of content moderation 27 | 4. news article headlines (plus first 200 words of article full text) relating to the topic of content moderation 28 | 29 | Which annotation task they applied depended on the corpus 30 | 31 | ### "Common user" tweets 32 | 33 | For "common user" tweets, they collect annotations along the five coding dimenions: 34 | 35 | - *relevance*: determine whether a tweet is about content moderation or not 36 | - label classes: "relevant" (1), "irrelevant" (0) 37 | - *problem/solution frame*: classify whether a tweet frames content moderation as solution, a problem, neither, or both 38 | - label classes: "problem", "solution", "neither", "both" 39 | - *policy frame*: classify the policy issue emphasized in the tweet when discussing content moderation 40 | - label classes: 15 categories, ranging from "economy" to "cutlural identity" 41 | - *stance detection*: classify whether tweet expresses a positive stance towards Section 230, a negative stance, or a neutral stance 42 | - label classes: "negative", "neutral", "positive" 43 | - *topic detection*: classify what other related topics the tweet discusses 44 | - label classes: "section 230", "trump ban", "complaints", "twitter support", "platform policies", and "other" 45 | 46 | ### Member of Congress tweets 47 | 48 | For Member of Congress tweets, they collect annotations along two coding dimenions: 49 | 50 | - *relevance*: is the tweet discussing a political issue 51 | - label classes: "relevant" (1), "irrelevant" (0) 52 | - *policy frame*: classify the policy issue emphasized in the tweet when discussing content moderation 53 | - label classes: 15 categories, ranging from "economy" to "cutlural identity" 54 | 55 | ### News articles 56 | 57 | For news articles, they collect annotations along two coding dimenions: 58 | 59 | - *relevance*: determine whether a tweet is about content moderation or not 60 | - label classes: "relevant" (1), "irrelevant" (0) 61 | - *problem/solution frame*: classify whether a tweet frames content moderation as solution, a problem, neither, or both 62 | - label classes: "problem", "solution", "neither", "both" 63 | 64 | ## The data 65 | 66 | We have prepared the commun user tweets data, focusing on the relevance, problem/solution frame, and stance classification tasks: 67 | 68 | 1. relevance classifications: "gilardi_chatgpt_2023-content_moderation_relevance.csv" 69 | - column 'label' indicates the classification: 1 "relevant", 0 "irrelevant 70 | - column 'text' records the coded tweet's text 71 | 2. problem/solution frame classifications: "gilardi_chatgpt_2023-content_moderation_frame.csv" 72 | - column 'label' indicates the frame category: "problem", "solution", "neither", "both" 73 | - column 'text' records the coded tweet's text 74 | 3. stance classification: "gilardi_chatgpt_2023-section230_stance.csv" 75 | - column 'label' indicates the expressed stance on Section 230: "negative", "neutral", "positive" 76 | - column 'text' records the coded tweet's text 77 | 78 | ## Download data files 79 | 80 | | dataset_key | file | url | 81 | |:---------------------|:------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------| 82 | | gilardi_chatgpt_2023 | gilardi_chatgpt_2023-content_moderation_frame.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/gilardi_chatgpt_2023/gilardi_chatgpt_2023-content_moderation_frame.csv | 83 | | gilardi_chatgpt_2023 | gilardi_chatgpt_2023-section230_stance.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/gilardi_chatgpt_2023/gilardi_chatgpt_2023-section230_stance.csv | 84 | | gilardi_chatgpt_2023 | gilardi_chatgpt_2023-content_moderation_relevance.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/gilardi_chatgpt_2023/gilardi_chatgpt_2023-content_moderation_relevance.csv | -------------------------------------------------------------------------------- /data/labeled/petkevic_political_2022/README.md: -------------------------------------------------------------------------------- 1 | # Negativity in campaign tweets coded from Petkevic & Nai (2022) 2 | 3 | author: Hauke Licht & Naomi Yagai\ 4 | date: 2024-01-24 5 | 6 | ## Description 7 | 8 | In their 2022 *American Politics Research* paper "Political Attacks in 280 Characters or Less: A New Tool for the Automated Classification of Campaign Negativity on Social Media," Petkevic and Nai measure the negativity in candidate's campaign tweets in the 2018 U.S. Senate Midterms. 9 | They identify the presence/absence of four phenomena: 10 | 11 | - negative tone 12 | - policy attack 13 | - personal attack 14 | - incivility 15 | 16 | Their measurements are generated through a neural network classifier they trained on human-coded samples. 17 | 18 | ## The raw data 19 | 20 | The authors explain the data in their article section "Measuring Negative Campaigning in Tweets", "Data and Procedure" (pp. 281) as below. 21 | 22 | > The data (tweets) used in this study were collected via vicinitas.io, a website that allows for bulk downloading of tweets retroactively based on Twitter handles (usernames). 23 | > Prior to it, an online search for Twitter pages of all contemporaneous Senate election candidates was performed to determine which of the candidates used Twitter for their political campaigns and what their Twitter handles were. 24 | > The handles were then supplied to vicinitas.io to collect the tweets for the period of September 1, 2018 -- November 6, 2018 (the day of the election), for a total of N = 16,173 tweets. 25 | 26 | They additionally note that 27 | > Three candidates did not, to the best of our knowledge, post any tweets in that period (even though they do have a twitter handle)... 28 | > The analyses discussed in this article thus concern the 63 remaining candidates. 29 | > The number of tweets per candidate collected varies considerably, from N = 24 ... to N = 1028 ... with an average of 256.7 tweets per candidate. 30 | 31 | The corpus is in English. 32 | 33 | ## Annotation procedure 34 | 35 | The annotation procedure is described in their article section "Measuring Negative Campaigning in Tweets", "Data and Procedure" (pp. 281). 36 | 37 | > First, a random sample of 200 tweets was coded by four coders independently to check inter-coder reliability. 38 | > Regarding suboptimal scores, the codebook was reworked by analysing tweets where disagreements occurred and coders were consulted to establish systematic differences in the interpretation of negativity dimensions. 39 | > After introducing the new instructions, each coder was provided 100 tweets to annotate on each dimension. 40 | > This revealed the imbalance between labels since the majority was coded absent. 41 | > Thus, each coder was provided multiple random samples to annotate until at least 200 were coded "present" for the respective dimension. 42 | 43 | Each tweet was coded by each coder along four coding dimensions: 44 | 45 | - *neg_tone:* 1 if present, 0 if absent in a given tweet 46 | - *pol_att:* 1 if present, 0 if absent in a given tweet 47 | - *pers_att:* 1 if present, 0 if absent in a given tweet 48 | - *incivil:* 1 if present, 0 if absent in a given tweet 49 | 50 | The authors have shared the final codeobok (i.e., coding instructions) with us via email on June 6, 2024. 51 | They provide further descriptions of the coding dimensions and label categories: 52 | 53 | - *Negative* tone: Attack or critique of opponent 54 | - 1: Presence of explicit attack or critique toward opponent 55 | - 0: No explicit attack or critique toward opponent 56 | - *Policy attack: Attack or critique of a policy position of the opponent 57 | - 1: Presence of explicit attack or critique toward a policy proposition, political position, record once in office, ideas of the opponent 58 | - 0: No explicit attack or critique toward a policy proposition, political position, record once in office, ideas, of the opponent 59 | - *Personal attack*: Attack or critique of the character or persona of the opponent 60 | - 1: Presence of explicit attack or critique of the character, profile, personality, persona, figure, image, aspect, physical attributes of the opponent 61 | - 0: No explicit attack or critique of the character, profile, personality, persona, figure, image, aspect, physical attributes of the opponent 62 | - *Incivility*: Use of an uncivil language in the attack 63 | - 1: Use of a harsh, shrill, uncivil, offensive, vulgar language used in the attack (code only in the attack!) 64 | - 0: Normal and civil language in the attack, without use of a harsh, shrill, uncivil, offensive, vulgar language used in the attack 65 | 66 | 67 | ## The data 68 | 69 | The raw data is available on OSF: https://osf.io/up826/files/osfstorage/5e3d6dc8f1369e01818acfa0 70 | 71 | 72 | ### Cleaned data 73 | 74 | Corresponding to the analysis, we provide the following CSV files: 75 | 76 | - identification of negative tone: 77 | "petkevic_political_2022-campaigntweets_negative_tones.csv" 78 | - column 'label' indicates the annotation: 1 when present, 0 when absent 79 | - column 'text' records the coded tweets 80 | - column 'metadata__row_number' indicates the original row number 81 | of each tweet before cleaning the data 82 | 83 | - identification of policy attack: 84 | "petkevic_political_2022-campaigntweets_policy_attacks.csv" 85 | - column 'label' indicates the annotation: 1 when present, 0 when absent 86 | - column 'text' records the coded tweets 87 | - column 'metadata__row_number' indicates the original row number 88 | of each tweet before cleaning the data 89 | 90 | - identification of personal attack: 91 | "petkevic_political_2022-campaigntweets_personal_attacks.csv" 92 | - column 'label' indicates the annotation: 1 when present, 0 when absent 93 | - column 'text' records the coded tweets 94 | - column 'metadata__row_number' indicates the original row number 95 | of each tweet before cleaning the data 96 | 97 | - identification of incivility: 98 | "petkevic_political_2022-campaigntweets_incivility.csv" 99 | - column 'label' indicates the annotation: 1 when present, 0 when absent 100 | - column 'text' records the coded tweets 101 | - column 'metadata__row_number' indicates the original row number 102 | of each tweet before cleaning the data 103 | 104 | ## Download data files 105 | 106 | | dataset_key | file | url | 107 | |:------------------------|:------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------| 108 | | petkevic_political_2022 | petkevic_political_2022-campaigntweets_incivility.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/petkevic_political_2022/petkevic_political_2022-campaigntweets_incivility.csv | 109 | | petkevic_political_2022 | petkevic_political_2022-campaigntweets_negative_tones.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/petkevic_political_2022/petkevic_political_2022-campaigntweets_negative_tones.csv | 110 | | petkevic_political_2022 | petkevic_political_2022-campaigntweets_personal_attacks.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/petkevic_political_2022/petkevic_political_2022-campaigntweets_personal_attacks.csv | 111 | | petkevic_political_2022 | petkevic_political_2022-campaigntweets_policy_attacks.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/petkevic_political_2022/petkevic_political_2022-campaigntweets_policy_attacks.csv | -------------------------------------------------------------------------------- /data/labeled/sylvester_parlee_2022/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Download data files 3 | 4 | | dataset_key | file | url | 5 | |:----------------------|:---------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------| 6 | | sylvester_parlee_2022 | cap_topic_codes.tsv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/sylvester_parlee_2022/cap_topic_codes.tsv | 7 | | sylvester_parlee_2022 | sylvester_parlee_2022-uk_cap_annotations.csv | https://cta-text-datasets.s3.eu-central-1.amazonaws.com/labeled/sylvester_parlee_2022/sylvester_parlee_2022-uk_cap_annotations.csv | -------------------------------------------------------------------------------- /data/misc/bank_sentences_with_senses.csv: -------------------------------------------------------------------------------- 1 | text,sense 2 | I deposited my paycheck at the bank.,financial 3 | She made a large withdrawal from the bank for her new business.,financial 4 | He saved money at the bank for a rainy day.,financial 5 | They went to the bank to get a loan for their new house.,financial 6 | She hoped the bank would approve her for a mortgage.,financial 7 | He applied for a job at the bank downtown.,financial 8 | The savings account at the bank earned very little interest.,financial 9 | The bank sent him a new debit card after his was stolen.,financial 10 | He kept all his savings in the bank for security.,financial 11 | He opened a joint account at the bank with his spouse.,financial 12 | She logged into her bank account to check her balance.,financial 13 | She transferred money from her savings to her checking account at the bank.,financial 14 | The bank offered him a low-interest loan for his new car.,financial 15 | He banked his hopes on getting the promotion at work.,financial 16 | She worked at the bank as a financial advisor.,financial 17 | He kept his emergency fund in the bank for easy access.,financial 18 | The bank gave her a loan to start her small business.,financial 19 | He banked his earnings to save for his future education.,financial 20 | She banked the money she earned from her part-time job.,financial 21 | The bank of computers processed the data quickly.,financial 22 | The river flowed swiftly past the steep bank.,geographical 23 | He took a seat on the bank of the lake to enjoy the view.,geographical 24 | "They walked along the bank, skipping stones across the water.",geographical 25 | A thick fog had settled on the river bank.,geographical 26 | The fisherman cast his line from the bank into the deep water.,geographical 27 | They built a bank of solar panels to generate electricity.,geographical 28 | "He leaned against the bank of sand, tired after the hike.",geographical 29 | "The kids played by the bank, collecting rocks and shells.",geographical 30 | The soldiers took cover behind a bank of dirt during the firefight.,geographical 31 | They swam across the river and climbed onto the opposite bank.,geographical 32 | The river's bank was eroding after the heavy rains.,geographical 33 | "He sat on the bank, watching the sunset over the water.",geographical 34 | They pulled the boat onto the bank and set up camp.,geographical 35 | The bank of fog made it hard to see the road.,geographical 36 | The rowers paddled close to the bank to avoid the strong current.,geographical 37 | The bank of the river was muddy after the rainstorm.,geographical 38 | The river's bank was lined with tall trees and wildflowers.,geographical 39 | The plane tilted as it made a sharp bank to the left.,motion 40 | The car skidded off the road and into the bank of snow.,motion 41 | The plane began to bank sharply as it prepared to land.,motion 42 | The hikers reached the top of the hill and saw a bank of fog below.,motion 43 | The plane banked steeply as it made its final approach.,motion 44 | The bank of dirt blocked the road after the landslide.,motion 45 | The pilot had to bank the plane to avoid turbulence.,motion 46 | The airplane banked left to avoid turbulence.,motion 47 | He needed to bank on his experience to solve the problem.,motion 48 | The plane banked over the city as it came in for landing.,motion 49 | The plane had to bank sharply to avoid the storm.,motion 50 | He banked his car around the sharp curve of the mountain road.,motion 51 | The thick bank of snow made it difficult to drive through the mountain pass.,motion 52 | She banked her car around the curve of the mountain road.,motion 53 | The wind pushed the boat closer to the bank of the river.,motion 54 | "The fog bank rolled in, obscuring the view of the ocean.",motion 55 | "The fog bank moved in quickly, covering the entire valley.",motion 56 | He managed to bank enough points to win the competition.,motion 57 | The runners followed the path along the bank of the stream.,motion 58 | -------------------------------------------------------------------------------- /notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | README-old.md 2 | 3 | colab.md 4 | colab_link 5 | 6 | 7 | *.ipynb 8 | !test.ipynb 9 | 10 | !annotation/compute_ica_*.ipynb 11 | 12 | utils/ 13 | 14 | # legacy 15 | sandbox.ipynb 16 | test_*.ipynb 17 | *_commented.ipynb 18 | -------------------------------------------------------------------------------- /notebooks/annotation/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | compute_*.py 3 | -------------------------------------------------------------------------------- /notebooks/annotation/compute_ica_pledge_classification.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pandas as pd 3 | from scipy.stats import entropy 4 | from krippendorff import alpha as k_alpha 5 | 6 | data_path = "../../data/labeled/fornaciari_we_2021" 7 | data_path = Path(data_path) 8 | 9 | # ## Read the annotations 10 | groups = ['group1', 'group2', 'group3'] 11 | 12 | k_alphas = {} 13 | 14 | for group in groups: 15 | 16 | annotations_path = data_path / "annotations" / "classification" / group 17 | 18 | fps = list(annotations_path.glob('*.csv')) 19 | annotations = pd.concat({fp.stem: pd.read_csv(fp) for fp in fps}, ignore_index=False).reset_index(level=0, names=['annotator']) 20 | tmp = annotations[['annotator', 'text_id', 'label']].copy() 21 | tmp['label'] = (tmp['label'].str.lower()=='pledge').astype(int) 22 | tmp = tmp.pivot_table(index='annotator', columns='text_id', values='label').fillna(0).astype(int) 23 | k_alphas[group] = k_alpha(tmp.values, level_of_measurement='nominal') 24 | 25 | print(pd.DataFrame.from_dict(k_alphas, orient='index', columns=['k_alpha']).round(3).to_markdown()) 26 | -------------------------------------------------------------------------------- /notebooks/annotation/compute_ica_pledge_extraction.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../..") 3 | 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | import numpy as np 8 | 9 | from nltk.tokenize import TreebankWordTokenizer 10 | 11 | from krippendorff import alpha as k_alpha 12 | 13 | from src.annotation.agreement import InterAnnotatorAgreement, overlap_distance, split_iaa_by_item 14 | 15 | tokenizer = TreebankWordTokenizer() 16 | 17 | # list(tokenizer.span_tokenize(text)) 18 | def character_to_token_spans(text: str, spans: list[tuple[int, int]]) -> list[tuple[int, int]]: 19 | token_spans = list(tokenizer.span_tokenize(text)) 20 | token_span_list = [] 21 | for span in spans: 22 | start_char, end_char = span 23 | # Find the first token that starts after or at the start_char 24 | start_token = next((i for i, (s, _) in enumerate(token_spans) if s >= start_char), None) 25 | # Find the last token that ends before or at the end_char 26 | end_token = next((i for i, (_, e) in reversed(list(enumerate(token_spans))) if e <= end_char), None) 27 | if start_token is not None and end_token is not None and start_token <= end_token: 28 | token_span_list.append((start_token, end_token + 1)) # +1 to make it exclusive 29 | return token_span_list 30 | 31 | 32 | data_path = "../../data/labeled/fornaciari_we_2021" 33 | data_path = Path(data_path) 34 | 35 | k_alphas = {} 36 | for group in ['group1', 'group2']: 37 | annotations_path = data_path / "annotations" / "extraction" / group 38 | 39 | fps = list(annotations_path.glob('*.jsonl')) 40 | 41 | annotations = pd.concat({fp.stem: pd.read_json(fp, lines=True) for fp in fps}, ignore_index=False).reset_index(level=0, names=['annotator']) 42 | 43 | # discard entity type 44 | annotations['label'] = annotations.label.apply(lambda x: [anno[:2] for anno in x]) 45 | 46 | # add metadata and reformat the DataFrame 47 | if 'metadata' in annotations.columns: 48 | metadata = annotations['metadata'].apply(pd.Series) 49 | metadata.drop(columns=['label'], inplace=True) 50 | annotations[metadata.columns] = metadata 51 | annotations.drop(columns=['metadata'], inplace=True) 52 | 53 | annotations = annotations.sort_values(by=['text_id', 'annotator']).reset_index(drop=True) 54 | annotations = annotations[['text_id', 'text', 'annotator', 'label']] 55 | annotations['spans'] = annotations.apply(lambda x: [x['text'][lab[0]:lab[1]] for lab in x['label']], axis=1) 56 | 57 | # count number of annotations per annotator and text 58 | annotations['n_annos'] = annotations.label.map(len) 59 | annotations['no_annos'] = annotations['n_annos']==0 60 | 61 | # Let's apply this logic to all the data: 62 | # extract tokens 63 | annotations['tokens'] = annotations.apply(lambda x: tokenizer.tokenize(x['text']), axis=1) 64 | # determine token-level span informations 65 | annotations['token_spans'] = annotations.apply(lambda x: character_to_token_spans(x['text'], x['label']), axis=1) 66 | 67 | # Braylan et al.'s code requires that annotators and items are identified by integer IDs 68 | annotations['item_id'] = pd.Categorical(annotations['text_id']).codes 69 | annotations['annotator_id'] = pd.Categorical(annotations['annotator']).codes 70 | # create an agreement object with the entity distance function 71 | iaa = InterAnnotatorAgreement( 72 | annotations, 73 | item_colname="item_id", 74 | uid_colname="annotator_id", 75 | label_colname="label", 76 | distance_fn=overlap_distance 77 | ) 78 | # compute agreement measures 79 | iaa.setup(parallel_calc=False) 80 | k_alphas[group] = iaa.get_krippendorff_alpha() 81 | 82 | print(pd.DataFrame.from_dict(k_alphas, orient='index', columns=['k_alpha']).round(3).to_markdown()) 83 | -------------------------------------------------------------------------------- /setup/.gitignore: -------------------------------------------------------------------------------- 1 | test_google_colab.ipynb 2 | -------------------------------------------------------------------------------- /setup/README.md: -------------------------------------------------------------------------------- 1 | # Setup instructions and resources 2 | 3 | This folder contains instructions and resources for setting up your computer for the course. 4 | 5 | ## Python 6 | 7 | You have several options: 8 | 9 | 1. If you have not yet worked with Python a lot, I recommend you follow the instructions in [setup_python_with_anaconda.md](./setup_python_with_anaconda.md), because you are guided through the installation and setup process with detailed guides and you can rely on easy-to-use interfaces for this. 10 | 2. If you are a MacOS user, and you want to use *homebrew* for the installation, follow the instructions in [setup_macos.md](./setup_macos.md) 11 | 3. If you know hot to create virtual environments and how to install requirements, you can directly install them from the [requirements.txt](./requirements.txt) file 12 | 13 | ## Clone the github repository 14 | 15 | Follow the instructions in [setup_github_clone.md](./setup_github_clone.md) 16 | 17 | ## Google Colab for running notebooks online 18 | 19 | Follow the instructions in [setup_google_colab.md](./setup_google_colab.md) 20 | 21 | ## Accounts and software for using open-source LLMs 22 | 23 | - Install `ollama` following the instructions in [setup_ollama.md](./setup_ollama.md) 24 | 25 | 26 | ## Accounts to use commercial LLMs 27 | 28 | Follow the instructions in [setup_openai.md](./setup_openai.md) 29 | 30 | 31 | -------------------------------------------------------------------------------- /setup/imgs/openai_billing_configure_payment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/5d95688b78ba1a99e95ca040d670cb35634f8f94/setup/imgs/openai_billing_configure_payment.png -------------------------------------------------------------------------------- /setup/imgs/openai_billing_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/5d95688b78ba1a99e95ca040d670cb35634f8f94/setup/imgs/openai_billing_overview.png -------------------------------------------------------------------------------- /setup/imgs/openai_key_create_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/5d95688b78ba1a99e95ca040d670cb35634f8f94/setup/imgs/openai_key_create_new.png -------------------------------------------------------------------------------- /setup/imgs/openai_project_create_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/5d95688b78ba1a99e95ca040d670cb35634f8f94/setup/imgs/openai_project_create_new.png -------------------------------------------------------------------------------- /setup/imgs/vscode_python_environments_popup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/5d95688b78ba1a99e95ca040d670cb35634f8f94/setup/imgs/vscode_python_environments_popup.png -------------------------------------------------------------------------------- /setup/requirements.txt: -------------------------------------------------------------------------------- 1 | # data wrangling 2 | numpy~=1.26.0 3 | scipy~=1.10.1 4 | pillow~=10.4.0 5 | pandas~=2.2.2 6 | # basic text processing 7 | regex~=2025.9.1 8 | nltk==3.9.1 9 | gensim~=4.3.0 10 | krippendorff==0.8.1 11 | # machine learning basics 12 | scikit-learn~=1.7.2 13 | statsmodels~=0.14.5 14 | # visualization 15 | seaborn~=0.13.2 16 | matplotlib~=3.10.6 17 | # transformers finetuning 18 | torch~=2.8.0 19 | huggingface-hub~=0.34.4 20 | datasets==4.0.0 21 | tokenizers~=0.22.0 22 | sentencepiece~=0.2.1 23 | protobuf~=5.29.5 24 | accelerate==1.10.1 25 | bitsandbytes~=0.42.0 26 | transformers~=4.56.1 27 | bertviz==1.4.1 28 | sentence-transformers~=5.1.0 29 | trl~=0.23.0 30 | seqeval~=1.2.2 31 | # neural topic modeling 32 | umap-learn~=0.5.9.post2 33 | hdbscan==0.8.40 34 | bertopic==0.17.3 35 | turftopic~=0.17.5 36 | # LLMs 37 | jinja2==3.1.6 38 | pydantic~=2.11.7 39 | outlines~=1.2.4 40 | ollama~=0.5.3 41 | openai~=1.107.0 42 | tiktoken~=0.11.0 43 | # misc 44 | pyyaml~=6.0.2 45 | python-dotenv~=1.1.1 46 | tqdm~=4.67.1 47 | # notebooks 48 | jupyter #~=1.1.1 49 | jupyter-client #~=8.6.3 50 | nbformat #~=5.10.4 51 | notebook #~=7.4.5 52 | ipython #~=9.5.0 53 | ipykernel #~=6.30.1 54 | ipywidgets #~=8.1.7 55 | -------------------------------------------------------------------------------- /setup/setup_github_clone.md: -------------------------------------------------------------------------------- 1 | # Pull a GitHub Repository 2 | 3 | ## About GitHub 4 | 5 | In our course, we will use [*GitHub*](https://github.com/), a platform for hosting and sharing code. 6 | GitHub allows you to download ("clone") repositories (folders with code, data, and documentation) directly to your computer. 7 | You can then work with these files locally and keep them up to date. 8 | 9 | ## Setup instructions 10 | 11 | To get started with GitHub, you need to clone the repository once to your computer. 12 | There are two main ways to do this: 13 | 14 | 1. using the command line (Anaconda Prompt on Windows, Terminal on macOS) 15 | 2. using the GitHub Desktop app 16 | 17 | After that, you can pull updates whenever something changes in the repository. 18 | 19 | 20 | ### 1. Cloning with the Command Line 21 | 22 | #### _otherwise_, on Windows 23 | 24 | - Open the **Anaconda Prompt** (installed with Anaconda). 25 | ![Open Anaconda Prompt](./screenshots/windows_anaconda_prompt.png) 26 | 27 | - Navigate to a folder where you want to store the repository (e.g., your `Documents` folder): 28 | 29 | ```shell 30 | cd %USERPROFILE%\Documents 31 | ``` 32 | 33 | - Clone the repository using its HTTPS link: 34 | 35 | ```shell 36 | git clone https://github.com/haukelicht/advanced_text_analysis.git 37 | ``` 38 | 39 | #### _otherwise_. on macOS 40 | 41 | - Open the **Terminal** app. 42 | ![Open Terminal](./screenshots/macos_terminal.png) 43 | 44 | - Navigate to a folder where you want to store the repository (e.g., your `Documents` folder): 45 | 46 | ```shell 47 | cd ~/Documents 48 | ``` 49 | 50 | - Clone the repository using its HTTPS link: 51 | 52 | ```shell 53 | git clone https://github.com/haukelicht/advanced_text_analysis.git 54 | ``` 55 | 56 | *Notes:* 57 | 58 | - Make sure `git` is installed. On macOS, you may be asked to install Xcode command line tools when running `git` the first time. On Windows, `git` comes bundled with Anaconda, so you are ready to go. 59 | - It's best to keep all course-related repositories in a dedicated folder (e.g., `Documents/CourseRepos`). 60 | 61 | 62 | ### 2. Cloning with GitHub Desktop 63 | 64 | If you prefer a graphical interface: 65 | 66 | 1. Install [GitHub Desktop](https://desktop.github.com/). 67 | 2. Open the app and log in with your GitHub account. 68 | 3. Go to **File > Clone Repository**. 69 | 4. Paste the repository's HTTPS link: https://github.com/haukelicht/advanced_text_analysis.git 70 | 5. Select a local folder (e.g., `Documents/advanced_text_analysis`) where you want to save it. 71 | 6. Click **Clone**. 72 | 73 | 74 | ### 3. Pulling Updates 75 | 76 | Once the repository is cloned, you don't need to clone it again. 77 | Instead, update it with the latest changes: 78 | 79 | #### **Command line (Windows/macOS):** 80 | 81 | ```shell 82 | cd path/to/repository 83 | git pull 84 | ``` 85 | 86 | #### **GitHub Desktop:** 87 | 88 | Click **Fetch origin** → **Pull origin** in the top bar. 89 | 90 | 91 | ### 4. Important Notes 92 | 93 | - Always `git pull` before starting work, to make sure you have the latest version. 94 | - Do **not** edit files in the repository unless you are asked to. Changes may be overwritten when pulling updates. 95 | - If you accidentally make edits, keep a backup copy outside the repository folder. 96 | - If you see error messages when pulling, contact the course instructor. 97 | -------------------------------------------------------------------------------- /setup/setup_google_colab.md: -------------------------------------------------------------------------------- 1 | # Setup Google Colab 2 | 3 | Please sign up with Google so that you can use Google Colab: https://colab.google/ 4 | 5 | If you have never worked with Google Colab before, please watch this short introduction video: https://youtu.be/RLYoEyIHL6A?si=yOohEqaqvdvSdKdV 6 | 7 | 8 | 9 | **_Optional:_** 10 | Consider investing $ 9,99 (11,01 €) per month for a _Colab Pro_. 11 | This will allow you to use more and better GPU resources. 12 | And you can cancel your subscription monthly at any time. 13 | -------------------------------------------------------------------------------- /setup/setup_macos.md: -------------------------------------------------------------------------------- 1 | # Python setup with conda 2 | 3 | To follow the setup instructions below, you'll need to work in the Terminal app. 4 | You can open it in three different ways 5 | 6 | 1. press "Command" (the ⌘ key) + "White space" (the spacebar), type "Terminal" in the search field, and hit Enter 7 | 2. open the Launchpad, type "Terminal" in the search field, then click Terminal 8 | 3. in the Finder, open the /Applications/Utilities folder, then double-click "Terminal.app" 9 | 10 | see https://support.apple.com/en-gb/guide/terminal/apd5265185d-f365-44cb-8b09-71a064a42125/mac 11 | 12 | ## Installation with homebrew 13 | 14 | *Homebrew* is a package manager for MacOS. 15 | 16 | ### Install homebrew 17 | 18 | Please follow the instructions here: https://docs.brew.sh/Installation 19 | 20 | ### Install python and conda through minforge 21 | 22 | Once you have installed homebrew, you can use it to install python (version 3.11): 23 | 24 | ```shell 25 | brew install python@3.11 26 | whereis python 27 | python --version 28 | ``` 29 | 30 | ### Install conda through miniforge 31 | 32 | Next, install *minigorge*, a minimal installer for Conda (see https://github.com/conda-forge/miniforge): 33 | 34 | ```shell 35 | brew install --cask miniforge 36 | ``` 37 | 38 | ## Installation without homebrew 39 | 40 | We recommend installation and setup throgh homebrew. 41 | 42 | But if you do *not* want to (or cannot) use homebrew, you can install manually install 43 | 44 | - *miniforge* from https://github.com/conda-forge/miniforge#miniforge3, or 45 | - *Anaconda* as described [here](https://www.anaconda.com/download/) and [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/macos.html) 46 | 47 | 48 | ## Creat a new conda environment 49 | 50 | First check if your Mac uses an Apple silicon instead of an Intel processors: https://support.apple.com/ 51 | 52 | ### macOS (_without_ Apple silicon) 53 | 54 | ```shell 55 | # create 56 | conda create -y -n advanced_text_analysis_gesis python=3.11 pip 57 | 58 | # activate 59 | conda activate advanced_text_analysis_gesis 60 | 61 | # verify python and pip versions and paths 62 | python --version 63 | which python # <== should be contain 'miniforge3/envs/advanced_text_analysis_gesis/bin' 64 | 65 | pip --version 66 | which pip # # <== should be contain 'miniforge3/envs/advanced_text_analysis_gesis/bin' 67 | ``` 68 | 69 | 70 | ### macOS with Apple silicon (ARM) 71 | 72 | source: https://towardsdatascience.com/python-conda-environments-for-both-arm64-and-x86-64-on-m1-apple-silicon-147b943ffa55 73 | 74 | ```shell 75 | # create 76 | CONDA_SUBDIR=osx-arm64 conda create -y -n advanced_text_analysis_gesis python=3.11 pip 77 | 78 | # activate 79 | conda activate advanced_text_analysis_gesis 80 | 81 | # verify python and pip versions and paths 82 | python --version 83 | which python # <== should be contain 'miniforge3/envs/advanced_text_analysis_gesis/bin' 84 | 85 | pip --version 86 | which pip # # <== should be contain 'miniforge3/envs/advanced_text_analysis_gesis/bin' 87 | ``` 88 | 89 | 90 | ## Install required python packages 91 | 92 | It's best to first check if there would be any version conflicts: 93 | 94 | ```shell 95 | pip install --dry-run --ignore-installed -r https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/main/setup/requirements.txt 96 | ``` 97 | 98 | If so, report to hauke.licht@uibk.ac.at. 99 | 100 | If not, install the packages: 101 | 102 | ```shell 103 | # install all required packages in the correct versions 104 | pip install -r https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/main/setup/requirements.txt 105 | ``` 106 | 107 | 108 | ### Only for macOS with Apple silicon (M1/M2/... chip): Check `torch` can use M1/M2/3 chip 109 | 110 | *Note:* if your mac has an Apple silicon (M1/M2/... chip), you need to have at least macOS 12.3 (Catalina) installed 111 | 112 | ```shell 113 | sw_vers | grep ProductVersion 114 | ``` 115 | 116 | If not, update your operating system. 117 | 118 | Next, you can check that Apple silicon (M1/M2/... chip) available to `torch`: 119 | 120 | ```shell 121 | python -c 'import torch.backends.mps as mps; print(mps.is_available())' # <== should be True 122 | ``` 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /setup/setup_ollama.md: -------------------------------------------------------------------------------- 1 | # Install and setup `ollama` 2 | 3 | We will use `ollama` to interact with open-source LLMs. 4 | Please install `ollama` from here: https://ollama.com/download 5 | 6 | **Note** — You will need macOS 11 Big Sur (or later) or Windows 10 (or later). 7 | 8 | ### Using `ollama` in python 9 | 10 | Run the code in notebook [test_ollama.ipynb](./test_ollama.ipynb) to verify that you can access the ollama API on your computer through python 11 | -------------------------------------------------------------------------------- /setup/setup_openai.md: -------------------------------------------------------------------------------- 1 | # Open AI account and API access 2 | 3 | ## 1. Create an OpenAI account 4 | 5 | Go to [OpenAI](auth.openai.com/authorize) and sign in our create an account. 6 | 7 | ## 2. Link a payment method and book some credit 8 | 9 | 1. go to https://platform.openai.com/settings/organization/general 10 | 2. In the top-left of the page-header, select "Personal" as you Organization 11 | 3. **optional:** create a new project "advanced_text_analysis_gesis" as shown below. 12 | 4. Go to the Billing in the menu on the left (under "**Organzation**") 13 | 5. Click on "Add payment details" and link a credit card 14 | 6. In the *Configure payment* pop-up (or by clicking "Add to credit balance"), add some credit, e.g., U.S.$ 10 15 | 16 | 17 | Step 2.3 18 | 19 | Step 2.4 and 2.5 20 | 21 | Step 2.6 22 | 23 | 24 | ## 3. Get your API key 25 | 26 | 1. Go to the [API keys](https://platform.openai.com/api-keys) page. 27 | 2. Create a new API key by clicking on the "Create new secret key" button. 28 | 3. Fill your information in the form shown below and click on the "Create secret key" button. 29 | 30 | Step 3.3 31 | 32 | **IMPORTANT** — 33 | Make sure you **take the step next** to store the API key in a safe place. 34 | This will be the only time you will be able to see your API key. 35 | 36 | ## 4. Make your API key accessible in VS Code 37 | 38 | 1. Create a file called `.env` in the root of your project folder. (It's important that the file name starts with a dot!) 39 | 2. Open it in a text editor 40 | 3. Now, 41 | 1. add `OPENAI_API_KEY=` in the first line of the file, 42 | 2. copy your API key from your browser window, 43 | 3. paste the key in your `.env` file behind the `=` 44 | 4. save the file and close it 45 | 46 | ## 5. Verify that your API key is accessible in python 47 | 48 | Run the code in notebook [test_openai_key.ipynb](./code/test_openai_key.ipynb) to verify that your API key is accessible in VS Code and your API access works. 49 | -------------------------------------------------------------------------------- /setup/setup_python_with_anaconda.md: -------------------------------------------------------------------------------- 1 | # Setup Python with Anaconda 2 | 3 | ## About Anaconda 4 | 5 | In the course we will use [*Anaconda*](https://www.anaconda.com/). 6 | On [Wikipedia](https://en.wikipedia.org/wiki/Anaconda_(Python_distribution)), Anaconda is described as a 7 | 8 | > ... distribution of the Python and R programming languages for scientific computing, that aims to simplify package management and deployment. 9 | > The distribution includes data-science packages suitable for Windows, Linux, and macOS. 10 | 11 | If you [ask ChatGPT](https://chat.openai.com/share/958fe6cc-b411-43e5-b156-23fb6ef4fb3f) why and how Anaconda is useful for teaching data science-related topics, you'll learn -- among other things -- that this is because 12 | 13 | > 1. ... Anaconda comes with a powerful package manager called Conda ... [which] allows users to easily install, update, and manage various data science libraries and tools. 14 | > 2. ... Anaconda is available for Windows, macOS, and Linux, making it suitable for a diverse range of students using different operating systems. 15 | > 4. ... Anaconda enables the creation of isolated virtual environments ... [which] [allow] students to work on different projects with different dependencies without interference. 16 | > 5. ... Anaconda includes Jupyter Notebook, ... [which] are widely used ... for creating interactive and shareable documents that combine code, explanations, and visualizations. 17 | > 6. ... Anaconda, Inc. offers educational resources and tutorials specifically designed for teachers and students. 18 | 19 | ## Setup instructions 20 | 21 | To setup Anaconda for our course, you need to take three steps: 22 | 23 | 1. install Anaconda 24 | 2. verify your Anaconda installation (python and conda should be ready to go) 25 | 3. create a new conda environment 26 | 4. install required pacakages in your conda environment 27 | 28 | Let's take these steps in turn! 29 | 30 | **_Note:_** If you encounter any issues, email me (hauke.licht@uibk.ac.at) or [post an issue](https://github.com/haukelicht/advanced_text_analysis/issues). 31 | 32 | ### 1. Install Anaconda 33 | 34 | We recommend manual installation. 35 | For this, you need to download the installer for your operation system (Windows or macOS), and then run the installer to configure Anaconda for use on your computer 36 | 37 | #### Windows 38 | 39 | Download the Installer from https://www.anaconda.com/download/success#windows 40 | 41 | With the installer downloaded, the following two links provide detailed follow-up instructions: 42 | 43 | - https://www.datacamp.com/tutorial/installing-anaconda-windows 44 | - https://www.anaconda.com/docs/getting-started/anaconda/install#windows-installation 45 | 46 | *Notes:* 47 | 48 | 1. The screenshots in the first link are a little outdated, but the instructions remain valid. 49 | 2. In step six described in the first link, please opt for the "Alternative Aproach" (i.e., automatically add Anaconda to your PATH variable at installation time) 50 | 51 | #### MacOS/Linux 52 | 53 | Download the Installer from https://www.anaconda.com/download/success#mac or 54 | follow the instructions in ['setup_macos.md'](./setup_macos.md) 55 | 56 | *Notes:* 57 | 58 | - If your computer has Apple silicon (i.e., and M1/M2/... chip), choose "64-bit (Apple silicon) Graphical Installer". Otherwise, choose ""64-bit (Intel chip) Graphical Installer"" 59 | - If you *don't know* whether your Mac has Apple silicon, follow these instructions to find out: https://www.howtogeek.com/706226/how-to-check-if-your-mac-is-using-an-intel-or-apple-silicon-processor 60 | 61 | With the installer downloaded, the following two links provide detailed follow-up instructions: 62 | 63 | - https://www.datacamp.com/tutorial/installing-anaconda-mac-os-x 64 | - https://www.anaconda.com/docs/getting-started/anaconda/install#macos-linux-installation 65 | 66 | **_Important note:_** When asked to select the destination of the installation (step 4 in the second link), please choose "Install for me only" (e.g., use your Application folder) 67 | 68 | ### 2. Verify your Anaconda installation 69 | 70 | follow the instructions here: 71 | 72 | - Windows: https://www.anaconda.com/docs/getting-started/anaconda/install#windows-installation:how-do-i-verify-my-installers-integrity 73 | - macOS/Linux: https://www.anaconda.com/docs/getting-started/anaconda/install#macos-linux-installation:how-do-i-verify-my-installers-integrity 74 | 75 | ### 3. Create a new conda environment 76 | 77 | #### Using the Anaconda Navigator app 78 | 79 | The Anaconda Navigator app should have open when you finished your Anaconda installation. 80 | If not, open it from your applications (see [here](https://docs.anaconda.com/free/navigator/getting-started/#navigator-starting-navigator) for instructions). 81 | 82 | Then, follow the instructions [here](https://docs.anaconda.com/free/navigator/tutorials/create-python35-environment/), taking into account the following notes: 83 | 84 | - in step 4, use 'advanced_text_analysis_gesis_2025' as environment name 85 | - in step 5, use the python version that starts with '3.11' (or higher) 86 | - in step 7, choose "open Terminal" and go to the next step of our setup process 87 | 88 | #### Using the command line 89 | 90 | - on Windows: Open the Anaconda Prompt 91 | - on macOS: Open the Terminal app 92 | 93 | To create a new conda environment, run the following lines: 94 | 95 | **_Note:_** If you are a Mac user and your MacBook has an M1, M2, or M3 chip, put `CONDA_SUBDIR=osx-arm64` in front of `conda create` when running the code below 96 | 97 | ```shell 98 | conda create --name advanced_text_analysis_gesis_2025 python=3.11 pip 99 | 100 | conda activate advanced_text_analysis_gesis_2025 101 | ``` 102 | 103 | 104 | - The part after `--name` is the name of the environment. So our new environment is called 'advanced_text_analysis_gesis_2025' 105 | - `python=3.11` specifies that we want to use python version 3.12 in this environment 106 | - `pip` specifies that we want to pre-install pip 107 | 108 | ### 4. Install required pacakages 109 | 110 | It's best to first check if there would be any version conflicts. 111 | 112 | So first, run the following command in the Anaconda Prompt (Windows)/Terminal (macOS): 113 | 114 | ```shell 115 | pip install --dry-run --ignore-installed -r https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/main/setup/requirements.txt 116 | ``` 117 | 118 | If this raises any error messages, report them to hauke.licht@uibk.ac.at. 119 | 120 | If not, install the packages by running the following command in the Anaconda Prompt (Windows)/Terminal (macOS): 121 | 122 | ```shell 123 | # install all required packages in the correct versions 124 | pip install -r https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/main/setup/requirements.txt 125 | ``` 126 | 127 | ## Errors and issues 128 | 129 | If you encounter any issues, [post an issue](https://github.com/haukelicht/advanced_text_analysis/issues) or email Hauke (via hauke.licht@uibk.ac.at). 130 | -------------------------------------------------------------------------------- /setup/setup_vs_code.md: -------------------------------------------------------------------------------- 1 | # Setting up VS Code 2 | 3 | We will be using _Visual Studio Code_ (VS Code) as a code editor in the workshop. 4 | Please install **VS Code** _before_ the course from https://code.visualstudio.com/Download. 5 | 6 | **_Note for Windows users:_** Some python packages we use (e.g., numpy) require a working C++ installation. Please install C++ for VS Code from [here](https://code.visualstudio.com/docs/languages/cpp) as described here [here](https://code.visualstudio.com/docs/cpp/config-msvc). 7 | 8 | You will also need to install **Python** and the **conda** package manager. 9 | If you have not done so before, please refer to the setup instructions in [setup_python_with_anconda.md](./setup_python_with_anconda.md). 10 | Alternatively, you can download and install 11 | 12 | - Python as described [here](https://www.python.org/downloads) 13 | - and conda as described [here](https://conda.io/projects/conda/en/latest/user-guide/install) 14 | 15 | Finally, in VS code, you also need to install the Python and Jupyter **extensions**. 16 | You can do this by clicking on the "Extensions" icon in the left-hand sidebar, searching for "Python" ("Jupyter") and clicking on the "Install" button. 17 | 18 | You shouldn't run into any issues if you have admin rights on your computer. 19 | But if you run into difficulties, please check https://code.visualstudio.com/docs/languages/python. 20 | If you don't find a solution, you can also email Hauke at hauke.licht@uibk.ac.at. 21 | 22 | 23 | ### Conda environment 24 | 25 | **_Note:_** You can skip this step if you have already completed steps 3 and 4 in [setup_python_with_anconda.md](./setup_python_with_anconda.md). 26 | 27 | To ensure that everyone uses the same python and packages versions, we will create and use a virtual conda environment. 28 | For this, you'll need to open 29 | 30 | - the *Anaconda Prompt* app if you are a Windows user, **_or_** 31 | - the *Terminal* app if you are a Mac user 32 | 33 | In the Anaconda Prompt/Terminal, execute the following lines (by copy-pasting them there and pressing Enter): 34 | 35 | ```bash 36 | conda create -n advanced_text_analysis_gesis_2025 -y python=3.11 pip 37 | conda activate advanced_text_analysis_gesis_2025 38 | pip install -r https://raw.githubusercontent.com/haukelicht/advanced_text_analysis/main/setup/requirements.txt 39 | ``` 40 | 41 | ### Selecting the conda environment in VS Code 42 | 43 | When running some python script or a cell in a Jupyter notebook in VS Code, you will be prompted to select the python interpreter. 44 | 45 | Pop-up in VS Code for selecting a python interpreter 46 | 47 | In our case, we will always select the `advanced_text_analysis_gesis_2025` environment 48 | 49 | 50 | **_Alternatives_** 51 | 52 | - create a native python virtual environment (like [this](https://realpython.com/lessons/creating-virtual-environment/)), and/or 53 | - install the required python packages listed the [requirements.txt](setup/requirements.txt) file manually 54 | 55 | ### Setup the course repo as a project in VS Code 56 | 57 | 1. If you have not done so yet, clone the github repository as described in [setup_github_clone.md](./setup_github_clone.md) 58 | 2. Open the VS Code app 59 | 60 | 3. Select Open (folder) option 61 | 4. Open the folder named `advanced_text_analysis` you have created by cloning the github repo 62 | 63 | This should open the folder as a project in you VS Cod app 64 | 65 | ### Make local code in `src/` available in Juptyer notebooks 66 | 67 | 1. In VS Code, open an integrated terminal via the Menu (Terminal > New Terminal). The Terminal window opens in the center-bottom of the app window. 68 | 2. in the Terminal, type: `pwd` and hit enter 69 | - On macOS/Linux, this should print a line starting with `/User/`. 70 | - On Windows, this should print a line starting with `C:\` (or another capital letter at the beginning, like `D`) 71 | 3. Copy this line 72 | 4. Create a file called `.env` in the root of your project folder by selecting File > New Text File in the menu. (It's important that the file name starts with a dot!) 73 | 5. Open the file in VS Code. 74 | 6. Now, 75 | 1. add a new line to the file 76 | 2. begin the line with `PYTHONPATH="` 77 | 3. paste the line you have copied in step (3) above behind the `"` (without a white space) 78 | 4. on 79 | - macOS/Linux, add `:${PYTHONPATH}` 80 | - Windows, add ` ${PYTHONPATH}` (with a white space at the beginning) 81 | 5. end the line with `"` 82 | 6. In the menu, select File > Save 83 | 84 | 85 | #### Desired result 86 | 87 | The complete line you added to your `.env` file should look like this: 88 | 89 | ##### on macOS/Linux 90 | 91 | ``` 92 | PYTHONPATH="/Users/hauke/courses/advanced_text_analysis:${PYTHONPATH}" 93 | ``` 94 | 95 | *note:* the part `/hauke/courses/` will be different for you, because you have the folder in a different location on your machine. 96 | 97 | ##### on Windwos 98 | 99 | ``` 100 | PYTHONPATH="C:\Users\Hauke\Courses\advanced_text_analysis ${PYTHONPATH}" 101 | ``` 102 | 103 | *note:* the part `C:\Hauke\Courses\` will be different for you, because you have the folder in a different location on your machine. 104 | -------------------------------------------------------------------------------- /setup/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c38d025b", 6 | "metadata": {}, 7 | "source": [ 8 | "## Test python setup" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "f226d2b1", 14 | "metadata": {}, 15 | "source": [ 16 | "When you open this notebook the first time in your VS code session, click on \"Select Kernel\" in the top right, and chose `advance_text_analysis_gesis_2025` from the dropdown list shown when clicking on \"Python Environments\" (click \"Select Another Kernel ...\" if you can't see \"Python Environments\" yet)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 26, 22 | "id": "ff18e4f9", 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "python environment: advanced_text_analysis_gesis_2025\n", 30 | "python version: 3.12.11 | packaged by conda-forge | (main, Jun 4 2025, 14:38:53) [Clang 18.1.8 ]\n", 31 | "platform: macosx-11.0-arm64\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "import sys, sysconfig\n", 37 | "from pathlib import Path\n", 38 | "print('python environment:', Path(sys.executable).parent.parent.name)\n", 39 | "print('python version:', sys.version)\n", 40 | "print('platform:', sysconfig.get_platform())" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "22d00126", 46 | "metadata": {}, 47 | "source": [ 48 | "**_Note:_**\n", 49 | "The first row should show list `advanced_text_analysis_gesis_2025` as the python environment. \n", 50 | "If not, please make sure you have selected the correct kernel in VS Code (see below)." 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "1efc3b78", 56 | "metadata": {}, 57 | "source": [ 58 | "### checks" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "id": "f134d7e9", 64 | "metadata": {}, 65 | "source": [ 66 | "#### 1. required packages are installed with correct versions" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "870fa3cd", 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "No broken requirements found.\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "!pip check" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "9a19435f", 90 | "metadata": {}, 91 | "source": [ 92 | "**_Note:_** \n", 93 | "This should print `No broken requirements found.`\n", 94 | "If not, run uncomment the code below and run it:" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "81355f98", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "#!pip install -r requirements.txt" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "id": "a0559616", 110 | "metadata": {}, 111 | "source": [ 112 | "#### 2. important package imports" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 4, 118 | "id": "3b93171f", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "import numpy" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "id": "098a6359", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "import scipy" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 1, 138 | "id": "9d1d4dde", 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "import gensim" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 2, 148 | "id": "7dc321c6", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import nltk" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "id": "eee9bedf", 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "import sklearn" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 7, 168 | "id": "b4995a5e", 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stderr", 173 | "output_type": "stream", 174 | "text": [ 175 | "/Users/hlicht/miniforge3/envs/advanced_text_analysis_gesis_2025/lib/python3.11/site-packages/turftopic/serialization.py:7: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", 176 | " import pkg_resources\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "import turftopic" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 3, 187 | "id": "ee170a94", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "import transformers" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 9, 197 | "id": "b457ed48", 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "import ollama, openai" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "id": "01b83ecd", 207 | "metadata": {}, 208 | "source": [ 209 | "#### 3. `tqdm` runs with `jupyter` and `ipywidget`" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 1, 215 | "id": "261c54cc", 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "application/vnd.jupyter.widget-view+json": { 221 | "model_id": "58e9829e125d456292f2c3cea71ff88b", 222 | "version_major": 2, 223 | "version_minor": 0 224 | }, 225 | "text/plain": [ 226 | " 0%| | 0/5 [00:00 Dict[str, float]: 22 | 23 | with warnings.catch_warnings(): 24 | warnings.filterwarnings('ignore') 25 | # metrics 26 | precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary', zero_division=0.0) 27 | acc_balanced = balanced_accuracy_score(y_true, y_pred) 28 | acc_not_balanced = accuracy_score(y_true, y_pred) 29 | 30 | result = { 31 | 'accuracy': acc_not_balanced, 32 | 'accuracy_balanced': acc_balanced, 33 | 'f1': f1, 34 | 'precision': precision, 35 | 'recall': recall, 36 | } 37 | 38 | return result 39 | 40 | def compute_sequence_classification_metrics_multiclass( 41 | y_true: List[List[int]], 42 | y_pred: List[List[int]], 43 | label2id: Dict[str, int] 44 | ) -> Dict[str, float]: 45 | 46 | # overall metrics 47 | with warnings.catch_warnings(): 48 | warnings.filterwarnings('ignore') 49 | precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(y_true, y_pred, average='macro', zero_division=0.0) 50 | precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(y_true, y_pred, average='micro', zero_division=0.0) 51 | acc_balanced = balanced_accuracy_score(y_true, y_pred) 52 | acc_not_balanced = accuracy_score(y_true, y_pred) 53 | 54 | results = { 55 | 'accuracy': acc_not_balanced, 56 | 'accuracy_balanced': acc_balanced, 57 | 'f1_macro': f1_macro, 58 | 'precision_macro': precision_macro, 59 | 'recall_macro': recall_macro, 60 | 'f1_micro': f1_micro, 61 | 'precision_micro': precision_micro, 62 | 'recall_micro': recall_micro, 63 | } 64 | 65 | # by class metrics 66 | with warnings.catch_warnings(): 67 | precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average=None, labels=list(label2id.values()), zero_division=0.0) 68 | for l, (p, r, f) in zip(label2id.keys(), zip(precision, recall, f1)): 69 | results[f'precision_{l}'] = p 70 | results[f'recall_{l}'] = r 71 | results[f'f1_{l}'] = f 72 | 73 | return results 74 | 75 | from sklearn.metrics import hamming_loss, accuracy_score, f1_score, label_ranking_loss 76 | 77 | def parse_sequence_classifier_prediction_output_multilabel(p: PredictionOutput): 78 | logits, labels = p 79 | probs = 1 / (1 + np.exp(-logits)) # Sigmoid 80 | y_pred = (probs >= 0.5).astype(int) 81 | return labels, y_pred 82 | 83 | def compute_sequence_classification_metrics_multilabel(y_true, y_pred) -> Dict[str, float]: 84 | """ 85 | 86 | **Interpretation** 87 | 88 | - *Hamming Loss* 89 | - Measures the fraction of labels that are incorrectly predicted (either a 0 instead of 1 or vice versa). 90 | - Lower is better; `0.0` means perfect prediction. 91 | - Formula: `(number of wrong labels) / (number of total labels)` 92 | - Good for understanding average label-wise error rate. 93 | 94 | - *Subset Accuracy (Exact Match Ratio)* 95 | - Fraction of examples where **all** labels are predicted correctly. 96 | - Very strict; requires the entire label set to be correct per sample. 97 | - Value ranges from `0.0` (no perfect predictions) to `1.0` (all perfect). 98 | - Not very forgiving if you're slightly wrong on multi-hot labels. 99 | 100 | - *F1-Macro* 101 | - Calculates F1 score **per label**, then takes the unweighted average. 102 | - Treats all labels equally regardless of how often they appear. 103 | - Sensitive to performance on rare labels. 104 | - Useful when class imbalance is a concern and all labels are important. 105 | 106 | - *F1-Micro* 107 | - Aggregates true positives, false positives, and false negatives across all labels before computing F1. 108 | - Gives more weight to frequent labels. 109 | - Better when the number of positive examples per label varies a lot. 110 | - Often higher than macro F1 in imbalanced datasets. 111 | 112 | - *Ranking Loss* 113 | - Measures how often a **relevant label** is ranked lower than an irrelevant one. 114 | - Lower is better; `0.0` means perfect ranking. 115 | - Requires access to the **raw prediction scores** (before thresholding). 116 | - Useful in retrieval or recommendation scenarios where ranking quality matters. 117 | """ 118 | 119 | # Apply sigmoid and threshold 120 | 121 | return { 122 | "hamming_loss": hamming_loss(y_true, y_pred), 123 | "subset_accuracy": accuracy_score(y_true, y_pred), 124 | "f1_macro": f1_score(y_true, y_pred, average="macro"), 125 | "f1_micro": f1_score(y_true, y_pred, average="micro"), 126 | # "ranking_loss": label_ranking_loss(y_true, probs) 127 | } 128 | 129 | 130 | # token classification 131 | 132 | def _correct_iob2(labels: List[str]) -> List[str]: 133 | prev = None 134 | edit = list() 135 | for i, l in enumerate(labels): 136 | if (i == 0 or prev == 'O') and l[0] == 'I': 137 | edit.append(i) 138 | prev = l 139 | if len(edit) > 0: 140 | labels = [l.replace('I-', 'B-') if i in edit else l for i, l in enumerate(labels)] 141 | return labels 142 | 143 | def parse_token_classifier_prediction_output(p: PredictionOutput): 144 | predictions, labels = p 145 | predictions = np.argmax(predictions, axis=2) 146 | return labels, predictions 147 | 148 | 149 | def compute_token_classification_metrics( 150 | y_true: List[List[int]], 151 | y_pred: List[List[int]], 152 | label2id: Dict[str, int], 153 | ) -> Dict[str, float]: 154 | 155 | label_list = list(label2id.keys()) 156 | types = list(set([l[2:] for l in label_list if l != 'O'])) 157 | 158 | # encode label IDs to labels 159 | predictions = [ 160 | _correct_iob2([label_list[p] for (p, l) in zip(preds, labs) if l != -100]) 161 | for preds, labs in zip(y_pred, y_true) 162 | ] 163 | labels = [ 164 | _correct_iob2([label_list[l] for (_, l) in zip(preds, labs) if l != -100]) 165 | for preds, labs in zip(y_pred, y_true) 166 | ] 167 | 168 | metrics = ['precision', 'recall', 'f1-score'] 169 | keys = ['macro avg', 'micro avg'] + types 170 | results = {} 171 | 172 | # Span level (Seqeval) 173 | result = seqeval_classification_report(labels, predictions, output_dict=True, zero_division=0.0) 174 | # flatten 175 | result = {k: result[k] for k in keys if k in result} 176 | # format 177 | result = { 178 | # format: metric name <=> metric value 179 | str(f"{k.replace(' avg', '')}_{m.replace('f1-score', 'f1')}"): scores[m] 180 | # iterate over class-wise results 181 | for k, scores in result.items() 182 | # iterate over metrics 183 | for m in metrics 184 | } 185 | 186 | return result -------------------------------------------------------------------------------- /src/setfit_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from sentence_transformers import SentenceTransformer 6 | from setfit import SetFitModel, SetFitHead 7 | 8 | from numpy.typing import NDArray 9 | from typing import List, Dict, Optional, Mapping, Union 10 | 11 | def get_class_weights(x: NDArray, multitarget: bool=False) -> NDArray: 12 | if not multitarget: assert x.ndim == 1, 'if multitarget=False, x.ndim must be 1' 13 | if multitarget: assert x.ndim == 2, 'if multitarget=True, x.ndim must be 2' 14 | 15 | if multitarget: 16 | # assume that multitarget feature indicators can only be True/False, i.e., 0/1 17 | w = x.sum()/x.sum(axis=0) 18 | w /= w.sum() 19 | return w 20 | else: 21 | _, cnts = np.unique(x, return_counts=True) 22 | w = sum(cnts)/cnts 23 | w /= w.sum() 24 | return w 25 | 26 | class SetFitHeadWithClassWeights(SetFitHead): 27 | """ 28 | A SetFit head that supports class-weights aware multi-class classification for end-to-end training. 29 | Binary classification is treated as 2-class classification. 30 | 31 | Args: 32 | in_features (`int`, *optional*): 33 | The embedding dimension from the output of the SetFit body. If `None`, defaults to `LazyLinear`. 34 | out_features (`int`, defaults to `2`): 35 | The number of targets. If set `out_features` to 1 for binary classification, it will be changed to 2 as 2-class classification. 36 | temperature (`float`, defaults to `1.0`): 37 | A logits' scaling factor. Higher values make the model less confident and lower values make 38 | it more confident. 39 | eps (`float`, defaults to `1e-5`): 40 | A value for numerical stability when scaling logits. 41 | bias (`bool`, *optional*, defaults to `True`): 42 | Whether to add bias to the head. 43 | device (`torch.device`, str, *optional*): 44 | The device the model will be sent to. If `None`, will check whether GPU is available. 45 | multitarget (`bool`, defaults to `False`): 46 | Enable multi-target classification by making `out_features` binary predictions instead 47 | of a single multinomial prediction. 48 | class_weights (`List[float]`, `numpy.typing.NDarray`, *optional*): 49 | """ 50 | 51 | def __init__( 52 | self, 53 | class_weights: Optional[Union[List[float], NDArray]] = None, 54 | **kwargs 55 | ) -> None: 56 | super(SetFitHeadWithClassWeights, self).__init__(**kwargs) 57 | 58 | if len(class_weights) != self.out_features: 59 | raise ValueError(f'length of `class_weights` must be same as `out_features`') 60 | 61 | self.class_weights = torch.tensor(class_weights, dtype=self.linear.weight.dtype).to(self._device) 62 | 63 | 64 | def get_loss_fn(self) -> nn.Module: 65 | if self.multitarget: # if sigmoid output 66 | return nn.BCEWithLogitsLoss(pos_weight=self.class_weights) 67 | return nn.CrossEntropyLoss(weight=self.class_weights) 68 | 69 | @property 70 | def device(self) -> torch.device: 71 | """ 72 | `torch.device`: The device on which the model is placed. 73 | 74 | Reference from: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py#L869 75 | """ 76 | return next(self.parameters()).device 77 | 78 | def to(self, device: Union[str, torch.device]) -> "SetFitHeadWithClassWeights": 79 | """Move this SetFitHeadWithClassWeights to `device`, and then return `self`. This method does not copy. 80 | 81 | Args: 82 | device (Union[str, torch.device]): The identifier of the device to move the model to. 83 | 84 | Returns: 85 | SetFitHeadWithClassWeights: Returns the original model, but now on the desired device. 86 | """ 87 | self.linear = self.linear.to(device) 88 | if hasattr(self, "class_weights"): 89 | self.class_weights = self.class_weights.to(device) 90 | return self 91 | 92 | def get_config_dict(self) -> Dict[str, Optional[Union[int, float, bool, List[float]]]]: 93 | return { 94 | "in_features": self.in_features, 95 | "out_features": self.out_features, 96 | "temperature": self.temperature, 97 | "bias": self.bias, 98 | "device": self.device.type, 99 | "multitarget": self.multitarget, 100 | "class_weights": self.class_weights.cpu().numpy().round(3).tolist() 101 | } 102 | 103 | def __repr__(self) -> str: 104 | return "SetFitHeadWithClassWeights({})".format(self.get_config_dict()) 105 | 106 | get_device = lambda: 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' 107 | 108 | def model_init( 109 | model_name: str, 110 | id2label: Mapping[int, str], 111 | multitarget_strategy: Optional[str]=None, 112 | class_weights: Optional[NDArray]=None, 113 | device: Optional[Union[str, torch.device]]=None 114 | ) -> "SetFitModel": 115 | if class_weights is not None: 116 | if multitarget_strategy is None: 117 | assert len(id2label) == len(class_weights), 'len(id2label) must equal len(class_weights)' 118 | 119 | if device is None: 120 | device = get_device() 121 | 122 | body = SentenceTransformer(model_name, device='cpu') 123 | 124 | head_kwargs = dict( 125 | in_features=body.get_sentence_embedding_dimension(), 126 | out_features=len(id2label), 127 | device='cpu', 128 | multitarget=isinstance(multitarget_strategy, str), 129 | ) 130 | if class_weights is not None: 131 | head_kwargs['class_weights'] = class_weights 132 | head = SetFitHeadWithClassWeights(**head_kwargs) 133 | else: 134 | head = SetFitHead(**head_kwargs) 135 | 136 | return SetFitModel( 137 | model_head=head, 138 | model_body=body, 139 | multitarget_strategy=multitarget_strategy, 140 | labels=list(id2label.values()), 141 | id2label=id2label 142 | ).to(device) 143 | -------------------------------------------------------------------------------- /src/topic_modeling.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from bertopic import BERTopic 5 | 6 | # import gensim.corpora as corpora 7 | # from gensim.models.coherencemodel import CoherenceModel 8 | 9 | from sklearn.metrics import ( 10 | silhouette_score, # <== compute overall, corpus-level score 11 | silhouette_samples # <== compute sample/document-level scores 12 | ) 13 | # import matplotlib.pyplot as plt 14 | 15 | # from typing import Union, List, Literal, Tuple, Dict 16 | 17 | # def compute_coherece( 18 | # model: BERTopic, 19 | # docs: Union[pd.Series, List[str]], 20 | # coherence_metric: Literal['u_mass', 'c_v', 'c_uci', 'c_npmi']='c_v', 21 | # ) -> Tuple[Dict[str, Union[float, Dict[int, float]]], CoherenceModel]: 22 | # """ 23 | # Compute coherence scores for a BERTopic model. 24 | 25 | # Parameters: 26 | # model (BERTopic): The BERTopic model. 27 | # docs (Union[pd.Series, List[str]]): The documents to compute coherence on. 28 | # Must be a pandas Series or a list of strings. 29 | # coherence_metric (Literal['u_mass', 'c_v', 'c_uci', 'c_npmi'], optional): The coherence metric to use. 30 | # Allowed values are 'u_mass', 'c_v', 'c_uci', 'c_npmi' (see https://radimrehurek.com/gensim/models/coherencemodel.html) 31 | # Defaults to 'c_v'. 32 | 33 | # Returns: 34 | # Tuple[Dict[str, Union[float, Dict[int, float]]], CoherenceModel]: 35 | # A tuple containing the coherence scores and the coherence model. 36 | # - scores (Dict[str, Union[float, Dict[int, float]]]): The coherence scores. 37 | # - overall (float): The overall coherence score. 38 | # - by_topic (Dict[int, float]): The coherence score for each topic. 39 | # - coherence_model (CoherenceModel): The coherence model object. 40 | 41 | # """ 42 | # # get topic top-n words 43 | # topic_words = [ 44 | # [word for word, _ in words if len(word) > 0] # for top-n words words in topic 45 | # for tid, words in model.topic_representations_.items() # iterate over topics 46 | # if tid > -1 # exclude outlier topic 47 | # ] 48 | # topic_words = [topic for topic in topic_words if len(topic)>0] 49 | 50 | # # extract vectorizer and analyzer from BERTopic 51 | # vectorizer = model.vectorizer_model 52 | # analyzer = vectorizer.build_analyzer() 53 | 54 | # if isinstance(docs, list): 55 | # docs = np.array(docs) 56 | # cleaned_docs = model._preprocess_text(docs) 57 | # toks = [analyzer(doc) for doc in cleaned_docs] 58 | 59 | # # get topics 60 | # topics = model.topics_ 61 | 62 | # # pre-process documents 63 | # documents = pd.DataFrame({"Document": toks, "ID": range(len(docs)), "Topic": topics}) 64 | # documents_per_topic = documents.groupby(['Topic'], as_index=False).agg({'Document': 'sum'}) 65 | 66 | # # extract features for Topic Coherence evaluation 67 | # # words = vectorizer.get_feature_names_out() 68 | # tokens = documents_per_topic.Document.to_list() 69 | # dictionary = corpora.Dictionary(tokens) 70 | # corpus = [dictionary.doc2bow(token) for token in tokens] 71 | 72 | # # compile coherence model 73 | # coherence_model = CoherenceModel( 74 | # topics=topic_words, 75 | # texts=tokens, 76 | # corpus=corpus, 77 | # dictionary=dictionary, 78 | # coherence=coherence_metric 79 | # ) 80 | 81 | # # evaluate coherence 82 | # scores = { 83 | # 'overall': coherence_model.get_coherence(), 84 | # 'by_topic': {tid: c for tid, c in enumerate(coherence_model.get_coherence_per_topic())} 85 | # } 86 | 87 | # return scores, coherence_model 88 | 89 | # def plot_topic_coherence_scores(scores, add_overall=True, figsize=(7, 5)): 90 | # coherences_df = pd.DataFrame( 91 | # scores['by_topic'].values(), 92 | # index=range(len(scores['by_topic'])), 93 | # columns=['coherence'] 94 | # ) 95 | # # create new plot 96 | # plt.figure(figsize=figsize) 97 | # coherences_df.sort_values(by='coherence', inplace=True) 98 | # coherences_df['coherence'].plot(kind='barh') 99 | # if add_overall: 100 | # # draw a vertical line at the overall coherence score 101 | # plt.axvline(scores['overall'], color='red', linestyle='--') 102 | # plt.xlim(0, 1) 103 | # plt.show() 104 | 105 | def compute_silhouette_scores(model: BERTopic, remove_outliers: bool=True, seed: int=42): 106 | """ 107 | Compute silhouette scores for a BERTopic model. 108 | 109 | Parameters: 110 | model (BERTopic): The BERTopic model. 111 | remove_outliers (bool, optional): Whether to remove outliers before computing silhouette scores. 112 | Defaults to True. 113 | seed (int, optional): The random seed. Defaults to 42. 114 | 115 | Returns: 116 | 117 | Dict[str, Union[float, pd.DataFrame]]: A dictionary containing the silhouette scores. 118 | - overall (float): The overall silhouette score. 119 | - by_topic (pd.DataFrame): The silhouette scores by topic. 120 | - topic (int): The topic id. 121 | - mean (float): The mean silhouette score. 122 | - std (float): The standard deviation of the silhouette scores. 123 | 124 | """ 125 | if remove_outliers: 126 | idxs = np.where(np.array(model.topics_) > -1)[0] 127 | embeddings = model.umap_model.embedding_[idxs, :] 128 | topics = np.array(model.topics_)[idxs].tolist() 129 | else: 130 | embeddings = model.umap_model.embedding_ 131 | topics = model.topics_ 132 | 133 | overall = silhouette_score( 134 | X=embeddings, 135 | labels=topics, 136 | sample_size=None, 137 | random_state=seed 138 | ) 139 | by_topic = silhouette_samples(X=embeddings, labels=topics) 140 | by_topic = pd.DataFrame({'topic': topics, 'silhouette_score': by_topic}) 141 | by_topic = by_topic.groupby('topic').agg(['mean', 'std']) 142 | # remove stacked columns 143 | by_topic.columns = by_topic.columns.droplevel(0) 144 | by_topic.reset_index(inplace=True) 145 | out = { 146 | 'overall': overall, 147 | 'by_topic': by_topic 148 | } 149 | return out -------------------------------------------------------------------------------- /src/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pandas as pd 4 | 5 | from typing import List, Union, Optional 6 | 7 | def _is_file(path: str) -> bool: 8 | return os.path.exists(path) and os.path.isfile(path) 9 | 10 | def _is_dir(path: str) -> bool: 11 | return os.path.exists(path) and os.path.isdir(path) 12 | 13 | def _get_col_separator(path: str) -> Union[str, None]: 14 | sep = None 15 | if path.endswith('.csv'): 16 | sep = ',' 17 | elif path.endswith('.tsv') or path.endswith('.tab'): 18 | sep = '\t' 19 | return sep 20 | 21 | def read_tabular(path: str, columns: Optional[List[str]]=None, **kwargs) -> 'pd.DataFrame': 22 | if not _is_file(str(path)): 23 | raise FileNotFoundError(f'File not found: {path}') 24 | 25 | sep = _get_col_separator(str(path)) 26 | if sep is None: 27 | raise ValueError(f'Unsupported file format. `path` Must be a .tsv, .tab, or .csv file.') 28 | 29 | df = pd.read_csv(path, sep=sep, **kwargs) 30 | if columns is not None: 31 | for c in columns: 32 | assert c in df.columns, f'Column {c} not found in the dataframe.' 33 | df = df[columns] 34 | return df 35 | 36 | def write_jsonlines(data: List[dict], path: str, **kwargs): 37 | d = os.path.dirname(path) 38 | if not _is_dir(d): 39 | raise FileNotFoundError(f'Directory not found: {d}') 40 | if _is_file(path): 41 | raise FileExistsError(f'File already exists: {path}') 42 | 43 | with open(path, 'w') as f: 44 | for d in data: 45 | f.write(json.dumps(d) + '\n') 46 | 47 | def read_jsonlines(path: str) -> List[dict]: 48 | if not _is_file(path): 49 | raise FileNotFoundError(f'File not found: {path}') 50 | 51 | with open(path, 'r') as f: 52 | data = [json.loads(l.rstrip()) for l in f] 53 | return data 54 | -------------------------------------------------------------------------------- /src/utils/token_counters.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains classes for interacting with language models (LLMs) from different providers. 3 | The classes are designed to be used as wrappers around the respective APIs, providing a consistent interface for interacting with the models. 4 | The classes are designed to be used as drop-in replacements for each other, allowing users to easily switch between different LLMs without having to change their code. 5 | """ 6 | 7 | import os 8 | 9 | import numpy as np 10 | 11 | import json 12 | from json import JSONDecodeError 13 | 14 | from transformers import AutoTokenizer 15 | from transformers.utils.hub import GatedRepoError 16 | from huggingface_hub.utils import HfHubHTTPError as HTTPError 17 | 18 | from transformers.utils import logging 19 | logging.get_logger("transformers").setLevel(logging.ERROR) 20 | import tiktoken 21 | 22 | from tqdm.auto import tqdm 23 | 24 | from dataclasses import dataclass 25 | from typing import Any, Dict, List, Union, Optional 26 | 27 | @dataclass 28 | class _TokenCounterBase: 29 | 30 | def count_tokens(self, input: Union[str, List[str]]) -> Union[int, List[int]]: 31 | """ 32 | Count the number of tokens in the input. 33 | 34 | Args: 35 | input (Union[str, List[str]]): The input to tokenize. Can be a string or a list of strings. 36 | 37 | Returns: 38 | Union[int, List[int]]: The number of tokens in the input. If the input is a list, returns a list of token counts. 39 | """ 40 | pass 41 | 42 | def __call__(self, input: Union[str, List[str]]) -> Union[int, List[int]]: 43 | """ 44 | Call the tokenizer on the input. This is equivalent to calling count_tokens. 45 | 46 | Args: 47 | input (Union[str, List[str]]): The input to tokenize. Can be a string or a list of strings. 48 | 49 | Returns: 50 | Union[int, List[int]]: The number of tokens in the input. If the input is a list, returns a list of token counts. 51 | """ 52 | return self.count_tokens(input) 53 | 54 | 55 | @dataclass 56 | class HFTokenCounter(_TokenCounterBase): 57 | tokenizer_name: str 58 | 59 | def __post_init__(self): 60 | try: 61 | self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) 62 | except (OSError, GatedRepoError, HTTPError) as e: 63 | raise Exception(( 64 | f"You don't have access to model {self.tokenizer_name}. " 65 | "Go to https://huggingface.co/{self.tokenizer_name} to request access. " 66 | "Then try instantiating the model/embedder again." 67 | )) 68 | 69 | def count_tokens(self, input: Union[str, List[str]]) -> Union[int, List[int]]: 70 | if (is_str := isinstance(input, str)): 71 | input = [input] 72 | toks = self.tokenizer(input, truncation=False, return_length=True)['length'] 73 | if is_str: 74 | toks = toks[0] 75 | return toks 76 | 77 | class OpenAITokenCounter(_TokenCounterBase): 78 | def __init__(self, encoding_name: Union[str, None] = None, model: Union[str, None] = None): 79 | """ 80 | Initialize the tokenizer with either a model or an encoding name. 81 | 82 | Args: 83 | encoding_name (Union[str, None]): The name of the encoding to use. Default is None. 84 | model (Union[str, None]): The model to use for encoding. Default is None. 85 | 86 | Raises: 87 | ValueError: If neither model nor encoding_name is provided. 88 | ValueError: If both model and encoding_name are provided. 89 | """ 90 | # ensure that either model or encoding_name is provided 91 | if model is None and encoding_name is None: 92 | raise ValueError("Either `model` or `encoding_name` must be provided.") 93 | if model is not None and encoding_name is not None: 94 | raise ValueError("Only one of `model` or `encoding_name` can be provided.") 95 | if encoding_name: 96 | self.encoding = tiktoken.get_encoding(encoding_name) 97 | else: 98 | self.encoding = tiktoken.encoding_for_model(model) 99 | 100 | def count_tokens(self, input: Union[str, List[str]]) -> Union[int, List[int]]: 101 | if isinstance(input, str): 102 | return len(self.encoding.encode(input)) 103 | else: 104 | toks = self.encoding.encode_batch(input) 105 | return [len(t) for t in toks] 106 | --------------------------------------------------------------------------------