├── figures
    ├── collab-growth.pdf
    ├── field-counts.pdf
    ├── yearly-growth.pdf
    ├── collab-growth2.pdf
    ├── yearly-growth-else.pdf
    ├── field-counts-sans-cs.pdf
    ├── yearly-growth-medicine.pdf
    ├── percentage-cited-by-non-XAI.pdf
    ├── citation-entropy-Computer Science.pdf
    └── percentage-cited-by-Computer Science.pdf
├── README.md
└── Retrieval.ipynb


/figures/collab-growth.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/collab-growth.pdf


--------------------------------------------------------------------------------
/figures/field-counts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/field-counts.pdf


--------------------------------------------------------------------------------
/figures/yearly-growth.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/yearly-growth.pdf


--------------------------------------------------------------------------------
/figures/collab-growth2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/collab-growth2.pdf


--------------------------------------------------------------------------------
/figures/yearly-growth-else.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/yearly-growth-else.pdf


--------------------------------------------------------------------------------
/figures/field-counts-sans-cs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/field-counts-sans-cs.pdf


--------------------------------------------------------------------------------
/figures/yearly-growth-medicine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/yearly-growth-medicine.pdf


--------------------------------------------------------------------------------
/figures/percentage-cited-by-non-XAI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/percentage-cited-by-non-XAI.pdf


--------------------------------------------------------------------------------
/figures/citation-entropy-Computer Science.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/citation-entropy-Computer Science.pdf


--------------------------------------------------------------------------------
/figures/percentage-cited-by-Computer Science.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alonjacovi/XAI-Scholar/HEAD/figures/percentage-cited-by-Computer Science.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # XAI-Scholar
 2 | 
 3 | Code and data for deriving empirical trends in XAI.
 4 | 
 5 | ## Sources
 6 | Please check the accompanying [Medium blogpost](https://medium.com/@alonjacovi/trends-in-explainable-ai-xai-literature-a1db485e871) and [Arxiv report](http://arxiv.org/abs/2301.05433).
 7 | 
 8 | The data was collected as described in the above links. To recap, it is a mix of keyword search via SemanticScholar, and manual curation. The final collection has 5199 papers as of December 31st 2022.
 9 | 
10 | The code used to interface with SemanticScholar uses the unofficial [semanticscholar](https://github.com/danielnsilva/semanticscholar) python library. Install with `pip install semanticscholar`. The plots and graphs use [Seaborn](https://seaborn.pydata.org/) and [GraphOnline](http://graphonline.ru/en/).
11 | 
12 | ## Format
13 | `XAI-Scholar_analysis.ipynb` is a Jupyter Notebook with the code necessary to reproduce the results in the medium/arxiv reports.
14 | 
15 | `xai-scholar.json` is a json dictionary with 5199 SemanticScholar `paperId` keys.
16 | 
17 | Each paper has the standard fields given by the SemanticScholar API:
18 | * `paperId`
19 | * `externalIds`
20 | * `url`
21 | * `title`
22 | * `abstract`
23 | * `venue`
24 | * `year`
25 | * `referenceCount`
26 | * `citationCount`
27 | * `influentialCitationCount`
28 | * `isOpenAccess`
29 | * `fieldsOfStudy`
30 | * `s2FieldsOfStudy`
31 | * `tldr`
32 | * `publicationTypes`
33 | * `publicationDate`
34 | * `journal`
35 | * `authors`
36 | 
37 | Note: Some of the papers are missing some of the fields, or they are marked as empty or `None`. 
38 | 
39 | The following fields are missing in `xai-scholar.json` and need to be retrieved from SemanticScholar due to their large size (expect around 1 GB without `embedding`):
40 | * `embedding`
41 | * `citations`
42 | * `references`
43 | 
44 | The code to do so is in the jupyter notebook - but it simply calls `semanticscholar.get_paper(paperId)` for each `paperId`.
45 | 


--------------------------------------------------------------------------------
/Retrieval.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "12dc819c",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from semanticscholar import SemanticScholar\n",
 11 |     "import pickle as pkl\n",
 12 |     "import csv\n",
 13 |     "import json\n",
 14 |     "from difflib import SequenceMatcher as sm\n",
 15 |     "from tqdm.notebook import tqdm\n",
 16 |     "import pandas as pd\n",
 17 |     "from collections import Counter\n",
 18 |     "\n",
 19 |     "from IPython.display import display, HTML\n",
 20 |     "display(HTML(\"<style>div.output_area pre {white-space: pre;}</style>\"))\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "id": "0deb1265",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "ss = SemanticScholar()\n",
 31 |     "fields = ['title', 'abstract', 'year', 'venue', 'publicationVenue', 'externalIds', 'url',\n",
 32 |     "          'journal', 'referenceCount', 'citationCount', 'influentialCitationCount',\n",
 33 |     "          'fieldsOfStudy', 'authors', 's2FieldsOfStudy', 'publicationTypes']"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "id": "a27cb0e5",
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Retrieving papers from SS with keyword queries\n",
 44 |     "\n",
 45 |     "keywords_xai = [\n",
 46 |     "    \" xai \", \"(xai)\", \"hcxai\", \"explainability\", \n",
 47 |     "    \"interpretability\", \n",
 48 |     "    \"explainable ai\",\n",
 49 |     "    \"explainable artificial intelligence\",\n",
 50 |     "    \"interpretable ml\", \"interpretable machine learning\", \"interpretable model\",\n",
 51 |     "    \"feature attribution\", \"feature importance\", \"global explanation\", \"local explanation\",\n",
 52 |     "    \"local interpretation\", \"global interpretation\",\n",
 53 |     "    \"model explanation\", \"model interpretation\", \"saliency\", \"counterfactual explanation\"]\n",
 54 |     "\n",
 55 |     "banned = [\"/xai/xai\", \"xai-xai\", \"xai xai\", \"workshop\", \"proceedings\"]\n",
 56 |     "\n",
 57 |     "papers = {}\n",
 58 |     "\n",
 59 |     "for query in keywords_xai:\n",
 60 |     "    print(\"Retrieving papers with query:\", query)\n",
 61 |     "\n",
 62 |     "    res = ss.search_paper(query, fields=fields)\n",
 63 |     "    \n",
 64 |     "    for i,x in tqdm(enumerate(res)):\n",
 65 |     "        title_lower = f\" {x.title.lower()} {x.abstract.lower() if x.abstract else ''} \"\n",
 66 |     "        \n",
 67 |     "        count = 0\n",
 68 |     "        for keyword in keywords_xai:\n",
 69 |     "            if keyword in title_lower:\n",
 70 |     "                count += 1\n",
 71 |     "        for keyword in banned:\n",
 72 |     "            if keyword in title_lower:\n",
 73 |     "                count = 0\n",
 74 |     "        \n",
 75 |     "        if count < 2:\n",
 76 |     "            continue\n",
 77 |     "\n",
 78 |     "        x = dict(x)\n",
 79 |     "        if 'embedding' in x:\n",
 80 |     "            del x['embedding']\n",
 81 |     "            \n",
 82 |     "        if x.paperId not in papers:\n",
 83 |     "            papers[x.paperId] = x\n",
 84 |     "            \n",
 85 |     "    print(\"# papers:\", len(papers))"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "id": "c6ddbb24",
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "# This cell performs one round of citation expansion based on a set of seed papers\n",
 96 |     "# For the XAI-Scholar dataset, I performed continuous expansion runs until an empty round (no new papers found)\n",
 97 |     "\n",
 98 |     "citation_expansion_seed = papers\n",
 99 |     "\n",
100 |     "expanded_papers = {}\n",
101 |     "\n",
102 |     "for pid in tqdm(citation_expansion_seed):\n",
103 |     "    # The paper dicts returned by \"search_paper()\" don't include references and citations,\n",
104 |     "    # so we need to retrieve them separately.\n",
105 |     "    full_paper = dict(ss.get_paper(pid))\n",
106 |     "    expanded_papers[pid] = full_paper\n",
107 |     "    \n",
108 |     "    if 'references' in full_paper:\n",
109 |     "        refs = full_paper['references']\n",
110 |     "    else:\n",
111 |     "        refs = []\n",
112 |     "        \n",
113 |     "    if 'citations' in full_paper:\n",
114 |     "        cites = full_paper['citations']\n",
115 |     "    else:\n",
116 |     "        cites = []\n",
117 |     "        \n",
118 |     "    for paper in refs + cites:\n",
119 |     "        pid2 = paper['paperId']\n",
120 |     "        \n",
121 |     "        if pid2 in expanded_papers or pid2 in citation_expansion_seed:\n",
122 |     "            continue\n",
123 |     "    \n",
124 |     "        title = paper['title']\n",
125 |     "        abstract = paper['abstract'] if paper['abstract'] else ''\n",
126 |     "        title_lower = f\" {title.lower()} {abstract.lower() if x['abstract'] else ''} \"\n",
127 |     "        \n",
128 |     "        count = 0\n",
129 |     "        for keyword in keywords_xai:\n",
130 |     "            if keyword in title_lower:\n",
131 |     "                count += 1\n",
132 |     "        for keyword in banned:\n",
133 |     "            if keyword in title_lower:\n",
134 |     "                count = 0\n",
135 |     "\n",
136 |     "        if count < 2:\n",
137 |     "            continue\n",
138 |     "        \n",
139 |     "        \n",
140 |     "        x = dict(ss.get_paper(pid2))\n",
141 |     "        \n",
142 |     "        if 'embedding' in x:\n",
143 |     "            del x['embedding']\n",
144 |     "        \n",
145 |     "        expanded_papers[pid2] = paper\n",
146 |     "        print(\"Hey :)\")"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "id": "b3269246",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": []
156 |   }
157 |  ],
158 |  "metadata": {
159 |   "kernelspec": {
160 |    "display_name": "Python 3",
161 |    "language": "python",
162 |    "name": "python3"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 3
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython3",
174 |    "version": "3.8.15"
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 5
179 | }
180 | 


--------------------------------------------------------------------------------