├── .coveragerc
├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── README.md
├── assets
    └── demo.gif
├── pyproject.toml
├── src
    ├── __init__.py
    ├── home.py
    ├── pages
    │   ├── 1_1️⃣_search.py
    │   ├── 2_2️⃣_study_selection.py
    │   ├── 3_3️⃣_results.py
    │   └── md
    │   │   └── home.md
    ├── tests
    │   └── __init__.py
    └── utils
    │   ├── __init__.py
    │   ├── consts.py
    │   ├── search_engine.py
    │   └── site_config.py
└── tests
    ├── __init__.py
    └── test_main.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | # .coveragerc to control coverage.py
 2 | [run]
 3 | omit =
 4 |     */site-packages/*
 5 |     */distutils/*
 6 |     tests/*
 7 |     /usr/*
 8 | 
 9 | [html]
10 | directory = htmlcov
11 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: [3.8]
14 | 
15 |     steps:
16 |       #----------------------------------------------
17 |       #       check-out repo and set-up python
18 |       #----------------------------------------------
19 |       - name: Check out repository
20 |         uses: actions/checkout@v2
21 |       - name: Set up python
22 |         uses: actions/setup-python@v2
23 |         with:
24 |           python-version: 3.8
25 |       #----------------------------------------------
26 |       #  -----  install & configure poetry  -----
27 |       #----------------------------------------------
28 | 
29 |       - name: Install Poetry
30 |         uses: snok/install-poetry@v1.1.1
31 |         with:
32 |           virtualenvs-create: true
33 |           virtualenvs-in-project: true
34 | 
35 |       #----------------------------------------------
36 |       #       load cached venv if cache exists
37 |       #----------------------------------------------
38 |       - name: Load cached venv
39 |         id: cached-poetry-dependencies
40 |         uses: actions/cache@v2
41 |         with:
42 |           path: .venv
43 |           key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
44 | 
45 |       #----------------------------------------------
46 |       # install dependencies if cache does not exist
47 |       #----------------------------------------------
48 |       - name: Install dependencies
49 |         run: |
50 |           poetry install
51 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
52 |       #----------------------------------------------
53 |       #              run test suite
54 |       #----------------------------------------------
55 |       - name: Run pytest
56 |         run: PYTHONPATH=src/ poetry run python -m coverage run -m pytest
57 | 
58 |       - name: Run Coverage
59 |         run: PYTHONPATH=src/ poetry run python -m coverage report -m;
60 | 
61 |       - name: Generate html Report
62 |         run: PYTHONPATH=src/ poetry run python -m coverage html
63 |       #----------------------------------------------
64 |       #              coverage badge
65 |       #----------------------------------------------
66 |       - uses: codecov/codecov-action@v2
67 |         with:
68 |           token: ${{ secrets.CODECOV_TOKEN }}
69 |           fail_ci_if_error: true
70 |           verbose: true
71 | 
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled
 2 | __pycache__/
 3 | 
 4 | # pipenv
 5 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 6 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 7 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 8 | #   install all needed dependencies.
 9 | poetry.lock
10 | 
11 | # Unit test / coverage reports
12 | .pytest_cache/
13 | */tests/data
14 | 
15 | # docker
16 | Dockerfile
17 | 
18 | # shell scripts
19 | *.sh
20 | 
21 | # vscode
22 | .vscode
23 | 
24 | # notes
25 | notes.md
26 | 
27 | # coverage
28 | .coverage
29 | htmlcov/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Automated search across multiple databases and preprint servers to save your time 
 2 | during structured literature search and review.
 3 | 
 4 | # Features
 5 | * Coherent structure across multiple databases and preprint servers
 6 | * Cross-reference search based on the references of your findings
 7 | * Export to advanced structured literature engines such as Ryyan or [cadima](https://www.cadima.info/)
 8 | * Excludes most duplicates across databases
 9 | * Manual selection based on publication details (see PRISMA)
10 | 
11 | # Demo
12 | 
13 | ![example image](https://github.com/ChristianGerloff/set-you-free/blob/feature/findpapers-integration/assets/demo.gif)
14 | 
15 | 
16 | # Requirements
17 | * python > 3.8
18 | * poetry
19 | 
20 | # How to start
21 | 
22 | Navigate to the repo folder and start poetry shell
23 | ```
24 | poetry shell
25 | ```
26 | 
27 | Install the dependencies
28 | ```
29 | poetry install
30 | ```
31 | 
32 | Start the application via streamlit
33 | ```
34 | streamlit run src/home.py
35 | ```
36 | 
37 | ## Authors
38 | 
39 | Christian Gerloff, Leon Lotter, Kashyap Maheshwari
40 | 
41 | ## How to cite
42 | If you use `Set You Free` please cite (see Zenodo):
43 | 
44 | 
45 | Gerloff C., Lotter L., & Maheshwari K. (2020). Set You Free: Automated Structured Literature Search.


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristianGerloff/set-you-free/f0752d468619456f97c83ee1d2456dfa4447f2b4/assets/demo.gif


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "set-you-free"
 3 | version = "0.1.0"
 4 | description = "Structured Literature Search "
 5 | authors = ["Christia Gerloff <christian.gerloff@rwth-aachen.de>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = ">=3.8.0,<4.0.0"
 9 | streamlit = "^1.11.0"
10 | findpapers = {git = "https://github.com/ChristianGerloff/findpapers.git", branch = "develop"}
11 | graphviz = "^0.20.1"
12 | matplotlib-venn = "^0.11.7"
13 | streamlit-aggrid = "^0.2.3"
14 | rispy = "^0.7.1"
15 | 
16 | [build-system]
17 | requires = ["poetry-core>=1.0.0"]
18 | build-backend = "poetry.core.masonry.api"
19 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristianGerloff/set-you-free/f0752d468619456f97c83ee1d2456dfa4447f2b4/src/__init__.py


--------------------------------------------------------------------------------
/src/home.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | from pathlib import Path
 4 | from utils.site_config import set_page_title, set_page_style
 5 | 
 6 | @st.cache
 7 | def get_home_md() -> str:
 8 |     """Returns home
 9 | 
10 |     Returns:
11 |         str -- The home as a string of MarkDown
12 |     """
13 |     url = Path(__file__).resolve().parent / "pages" / "md" / "home.md"
14 |     with open(url, mode="r") as file:
15 |         readme_md_contents = "".join(file.readlines())
16 |     return readme_md_contents.split("\n", 3)[-1]
17 | 
18 | set_page_title()
19 | 
20 | 
21 | # add sidebar
22 | st.sidebar.markdown("# Home 🎈")
23 | 
24 | # load page
25 | with st.spinner("Loading Home ..."):
26 |     st.title("Set You Free")
27 |     home = get_home_md()
28 |     st.markdown(home)
29 | 


--------------------------------------------------------------------------------
/src/pages/1_1️⃣_search.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import findpapers as fp
  3 | import streamlit as st
  4 | import utils.consts as cs
  5 | 
  6 | from utils.site_config import set_page_title
  7 | from utils.search_engine import build_search_str, single_search_str, get_search_str
  8 | from utils.search_engine import set_build_btns, set_single_btns
  9 | from utils.search_engine import convert_search_to_json
 10 | 
 11 | # configure page
 12 | set_page_title("Literature Search")
 13 | 
 14 | 
 15 | # sidebar
 16 | st.sidebar.title("Search settings")
 17 | 
 18 | # general settings
 19 | st.sidebar.write("We recommend using time-consuming enrich and cross-references "
 20 |                  "features only in console mode.")
 21 | enrich_col, cross_search_col = st.sidebar.columns(2)
 22 | enrich = enrich_col.checkbox("Enrich papers", value=False, help=cs.HELP_ENRICH)
 23 | cross_search = cross_search_col.checkbox("Cross-references", value=False, help=cs.HELP_CROSS_REF)
 24 | 
 25 | if enrich is True or cross_search is True:
 26 |     st.sidebar.info("We recommend using time-consuming enrich and" 
 27 |                     "cross-references features only in console mode.")
 28 | 
 29 | # publication types
 30 | pub_types = st.sidebar.multiselect("Select one or more publication types:",
 31 |                                    options=cs.AVAILABLE_PUBTYPES,
 32 |                                    default=cs.DEFAULT_PUBTYPES)
 33 | pub_types = None if pub_types == '' else pub_types
 34 | 
 35 | # API keys
 36 | st.sidebar.subheader("Please enter the following API keys")
 37 | ieee_api_key = st.sidebar.text_input("IEEE API key", type="password")
 38 | scopus_api_key = st.sidebar.text_input("Scopus API key", type="password")
 39 | 
 40 | # replace empty keys
 41 | ieee_api_key = None if ieee_api_key == '' else ieee_api_key
 42 | scopus_api_key = None if scopus_api_key == '' else scopus_api_key
 43 | 
 44 | if scopus_api_key is None:
 45 |     st.sidebar.info("If you do not have an API key for scopus,"
 46 |                     " it can be obtained from "
 47 |                     "[here](https://dev.elsevier.com/)")
 48 | 
 49 | # result limits
 50 | st.sidebar.subheader("Maximum number of papers")
 51 | limit = st.sidebar.slider("Please select the maximum number of papers per database.",
 52 |                           min_value=cs.RESULTS_MIN_SLIDER,
 53 |                           max_value=cs.RESULTS_MAX_SLIDER,
 54 |                           value=cs.RESULTS_DEFAULT_SLIDER)
 55 | 
 56 | # Duplication threshold here inverse definition
 57 | st.sidebar.subheader("Duplication sensitivity")
 58 | senitivity = st.sidebar.slider("Please select the maximum number of papers per database.",
 59 |                                min_value=cs.DUPLICATION_MIN_SLIDER,
 60 |                                max_value=cs.DUPLICATION_MAX_SLIDER,
 61 |                                step=cs.DUPLICATION_STEP_SLIDER)
 62 | similarity_threshold = 1 - (senitivity - cs.DUPLICATION_MIN_SLIDER)
 63 | 
 64 | # database selection
 65 | st.subheader("Select the Database(s)")
 66 | container = st.container()
 67 | all_db_selected = st.checkbox("Select all", value=True)
 68 | if all_db_selected:
 69 |     databases = container.multiselect("Select one or more Databases:",
 70 |                                       options=cs.AVAILABLE_DATABASES,
 71 |                                       default=cs.AVAILABLE_DATABASES)
 72 | else:
 73 |     databases = container.multiselect("Select one or more Databases:",
 74 |                                       options=cs.AVAILABLE_DATABASES)
 75 | 
 76 | st.session_state.databases = databases
 77 | 
 78 | # date picker
 79 | st.subheader("Publication Date :calendar:")
 80 | start_date_col, end_date_col = st.columns(2)
 81 | start_date = start_date_col.date_input("start date",
 82 |                                        datetime.date(2021, 10, 1))
 83 | end_date = end_date_col.date_input("end date")
 84 | 
 85 | # query
 86 | st.subheader("Search String")
 87 | search_str_type = st.selectbox(
 88 |     "How would you like to enter the search string?",
 89 |     cs.SEARCH_STRING_TYPE
 90 | )
 91 | 
 92 | if search_str_type == cs.SEARCH_STRING_TYPE[1]:
 93 |     search_str_txt = build_search_str()
 94 |     search_state = set_build_btns(search_str_txt)
 95 | elif search_str_type == cs.SEARCH_STRING_TYPE[0]:
 96 |     search_str_txt = single_search_str()
 97 |     search_state = set_single_btns(search_str_txt)
 98 | 
 99 | 
100 | search_string = get_search_str()
101 | 
102 | # search
103 | if search_state and search_string == "":
104 |     st.error("Please enter a search string")
105 | elif search_state and search_string != "":
106 |     st.write("Please wait till the results are obtained")
107 |     search = fp.search(None,
108 |                        search_string,
109 |                        start_date,
110 |                        end_date,
111 |                        limit=limit * len(databases),
112 |                        limit_per_database=limit,
113 |                        databases=databases,
114 |                        publication_types=pub_types,
115 |                        scopus_api_token=scopus_api_key,
116 |                        ieee_api_token=ieee_api_key,
117 |                        cross_reference_search=cross_search,
118 |                        enrich=enrich,
119 |                        similarity_threshold=similarity_threshold)
120 | 
121 |     # process search results
122 |     result_json = convert_search_to_json(search)
123 |     search_export = fp.RayyanExport(search)
124 |     rayyan_file, rayyan_df = search_export.generate_rayyan_csv()
125 |     ris = fp.RisExport(search)
126 |     ris_file, ris_df = ris.generate_ris()
127 | 
128 |     # store session data
129 |     if 'review' not in st.session_state:
130 |         st.session_state.search = search
131 |         st.session_state.ris_df = ris_df.copy()
132 |         st.session_state.rayyan_df = rayyan_df.copy()
133 |         st.session_state.review = ris_df.copy()
134 |         st.session_state.review.insert(1, 'criteria', 'default')
135 |         st.session_state.review.insert(1, 'decision', True)
136 |         st.session_state.review.insert(1, 'reviewed', False)
137 | 
138 |     else:
139 |         st.info("Override results!!!")
140 |         if st.button("Yes I'm ready to override"):
141 |             st.session_state.search = search
142 |             st.session_state.ris_df = ris_df.copy()
143 |             st.session_state.rayyan_df = rayyan_df.copy()
144 |             st.session_state.review = ris_df.copy()
145 |             st.session_state.review.insert(1, 'criteria', 'default')
146 |             st.session_state.review.insert(1, 'decision', True)
147 |             st.session_state.review.insert(1, 'reviewed', False)
148 | 
149 |     # display results
150 |     st.dataframe(ris_df)
151 | 
152 |     # download results
153 |     st.subheader("Download")
154 |     download_json, download_ris, download_csv, = st.columns(3)
155 |     download_json.download_button(label='Details - JSON',
156 |                                   data=result_json,
157 |                                   file_name='set_you_free_results.json',
158 |                                   mime='text/plain')
159 |     download_ris.download_button(label='CADIMA - RIS',
160 |                                  data=ris_file,
161 |                                  file_name='set_you_free_cadima.ris',
162 |                                  mime='text/plain')             
163 |     download_csv.download_button(label='Rayyan - CSV',
164 |                                  data=rayyan_file,
165 |                                  file_name='set_you_free_rayyan.csv',
166 |                                  mime='text/csv')
167 | 


--------------------------------------------------------------------------------
/src/pages/2_2️⃣_study_selection.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
 4 | from utils.site_config import set_page_title
 5 | 
 6 | 
 7 | # configure page
 8 | set_page_title("Study selection")
 9 | 
10 | st.subheader("Manual selection of publications")
11 | 
12 | if 'review' not in st.session_state:
13 |     st.error("Please run the search first.")
14 | else:
15 |     # sidebar
16 |     st.sidebar.title("Inspection settings")
17 | 
18 |     # criteria
19 |     criterias = st.sidebar.multiselect("Select one or more criterias:",
20 |                                        options=['default'],
21 |                                        default='default')
22 |     criterias = 'default' if criterias == '' else criterias
23 | 
24 |     st.sidebar.info(
25 |         f"Reviewed papers: {len(st.session_state.review[st.session_state.review.reviewed == True])} of "
26 |         f"{len(st.session_state.review)}"
27 |     )
28 | 
29 |     # select study
30 |     st.info(
31 |         "You can use the checkbox in the table (see doi) to select a publication. "
32 |         "The decision column describes which publications are included in your results."
33 |     )
34 | 
35 |     gb = GridOptionsBuilder.from_dataframe(st.session_state.review)
36 |     gb.configure_column(field='custom1', editable=True)
37 |     gb.configure_column(field='custom2', editable=True)
38 |     gb.configure_column(field='abstract', hide=True)
39 |     #gb.configure_column(field='title', pinned='left')
40 |     gb.configure_column(field='doi', pinned='left', checkboxSelection=True)
41 |     gb.configure_selection('single')  # use_checkbox=True
42 |     gb.configure_grid_options(stopEditingWhenCellsLoseFocus=True)
43 |     gb.configure_pagination(paginationAutoPageSize=True)
44 |     build_gb = gb.build()
45 |     grid = AgGrid(
46 |         dataframe=st.session_state.review,
47 |         width='100%',
48 |         update_mode=GridUpdateMode.__members__['MODEL_CHANGED'],
49 |         data_return_mode=DataReturnMode.__members__['AS_INPUT'],
50 |         gridOptions=build_gb,
51 |         fit_columns_on_grid_load=False,
52 |         theme='streamlit',
53 |         enable_enterprise_modules=True)
54 | 
55 |     st.session_state.review = grid['data']
56 |     selected = grid['selected_rows']
57 |     selected_df = pd.DataFrame(selected)
58 | 
59 |     with st.spinner("Load publication..."):
60 |         if not selected_df.empty:
61 |             st.markdown(f"## {selected_df.loc[0, 'title']} \n"
62 |                         f"***{selected_df.loc[0, 'doi']}***")
63 |             st.markdown("## Abstract \n"
64 |                         f"{selected_df.loc[0, 'abstract']}")
65 | 
66 |             # exclusion
67 |             st.subheader("Decision...")
68 | 
69 |             exclude_col, submit_col = st.columns(2)
70 | 
71 |             exclude = exclude_col.checkbox(f"Exclude because: {criterias}", value=False)
72 | 
73 |             submit = submit_col.button("submit")
74 | 
75 |             if submit:
76 |                 st.session_state.review.loc[
77 |                     st.session_state.review.id == selected_df.loc[0, 'id'],
78 |                     'reviewed'
79 |                 ] = True
80 |                 st.session_state.review.loc[
81 |                     st.session_state.review.id == selected_df.loc[0, 'id'],
82 |                     'decision'
83 |                 ] = not exclude
84 | 
85 |         
86 | 
87 | 


--------------------------------------------------------------------------------
/src/pages/3_3️⃣_results.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import streamlit as st
 3 | import graphviz as graphviz
 4 | import matplotlib.pyplot as plt
 5 | import findpapers as fp
 6 | 
 7 | from datetime import datetime
 8 | from matplotlib_venn import venn2, venn3
 9 | from utils.site_config import set_page_title
10 | from utils.search_engine import convert_search_to_json
11 | 
12 | # configure page
13 | set_page_title("Results of search")
14 | 
15 | if 'rayyan_df' not in st.session_state:
16 |     st.error("Please run the search first.")
17 | elif 'review' in st.session_state:
18 |     review = st.session_state.review.copy()
19 |     final_search = st.session_state.search
20 |     papers = review.loc[review['decision'] == False, ['title', 'date', 'doi']]
21 | 
22 |     if not papers.empty:
23 |         for _, p in papers.iterrows():
24 |             date = datetime.strptime(p.date, '%Y-%m-%d')
25 |             paper_key = final_search.get_paper_key(
26 |                 p.title, date, p.doi)
27 |             paper = final_search.paper_by_key.get(paper_key, None)
28 | 
29 |             # paper = final_search.get_paper(p.title, date)
30 |             if paper is not None:
31 |                 final_search.remove_paper(paper)
32 | 
33 |     result_json = convert_search_to_json(final_search)
34 |     search_export = fp.RayyanExport(final_search)
35 |     rayyan_file, rayyan_df = search_export.generate_rayyan_csv()
36 |     ris = fp.RisExport(final_search)
37 |     ris_file, ris_df = ris.generate_ris()
38 | 
39 |     st.subheader('PRISMA')
40 |     rayyan_selection = st.session_state.rayyan_df
41 |     databases = st.session_state.databases
42 |     all_papers = rayyan_selection.explode('databases')
43 |     stats_databses = all_papers.groupby(['databases'])['key'].apply(list)
44 |     n_duplicates = len(all_papers) - len(rayyan_selection)
45 |     n_removes = len(rayyan_selection) - len(rayyan_df)
46 | 
47 |     prisma = graphviz.Digraph('PRISMA')
48 |     prisma.attr('node', shape='box')
49 |     prisma.node('Identification')
50 |     prisma.node('Auto screening', f' {len(rayyan_selection)} records after duplicate removal')
51 |     prisma.node('Manual screening', f' {len(rayyan_df)} records after manual screeening')
52 |     prisma.edge('Identification', 'Auto screening', label=str(n_duplicates))
53 |     prisma.edge('Auto screening', 'Manual screening', label=str(n_removes))
54 | 
55 |     prisma1_col, prisma2_col = st.columns(2)
56 |     if len(databases) == 2:
57 |         venn2([set(stats_databses[databases[0]]),
58 |                set(stats_databses[databases[1]])],
59 |               set_labels=databases)
60 |         prisma1_col.graphviz_chart(prisma)
61 |         prisma2_col.pyplot(plt)
62 |     elif len(databases) == 3:
63 |         venn3([set(stats_databses[databases[0]]),
64 |                set(stats_databses[databases[1]]),
65 |                set(stats_databses[databases[2]])],
66 |               set_labels=databases)
67 |         prisma1_col.graphviz_chart(prisma)
68 |         prisma2_col.pyplot(plt)
69 |     else:
70 |         matches = []
71 |         prisma1_col.graphviz_chart(prisma)
72 |         unique_databases = [list(x) for x in set(tuple(x) for x in rayyan_df['databases'])]
73 |         for n in unique_databases:
74 |             matches.append(sum([n == i for i in rayyan_selection['databases']]))
75 |         y_pos = np.arange(len(unique_databases))
76 |         plt.bar(y_pos, matches)
77 |         plt.xticks(y_pos, unique_databases)
78 |         prisma2_col.pyplot(plt)
79 | 
80 | 
81 | 
82 |     # download review
83 |     st.subheader("Download review")
84 | 
85 |     download_json, download_ris, download_csv, = st.columns(3)
86 |     download_json.download_button(label='Details - JSON',
87 |                                   data=result_json,
88 |                                   file_name='set_you_free_results.json',
89 |                                   mime='text/plain')
90 |     download_ris.download_button(label='CADIMA - RIS',
91 |                                  data=ris_file,
92 |                                  file_name='set_you_free_cadima.ris',
93 |                                  mime='text/plain')                   
94 |     download_csv.download_button(label='Rayyan - CSV',
95 |                                  data=rayyan_file,
96 |                                  file_name='set_you_free_rayyan.csv',
97 |                                  mime='text/csv')
98 | 


--------------------------------------------------------------------------------
/src/pages/md/home.md:
--------------------------------------------------------------------------------
 1 | ## main
 2 | ## Set You Free
 3 | 
 4 | Automated search across multiple databases and preprint servers to save your time 
 5 | during structured literature search and review.
 6 | 
 7 | ## Features
 8 | * Coherent structure across multiple databases and preprint servers
 9 | * Cross-reference search based on the references of your findings
10 | * Export to advanced structured literature engines such as Ryyan or [cadima](https://www.cadima.info/)
11 | * Excludes most duplicates across databases
12 | * Manual selection based on publication details (see PRISMA)
13 | 
14 | 
15 | ### Authors
16 | 
17 | Christian Gerloff, Leon Lotter, Kashyap Maheshwari
18 | 
19 | ### How to cite
20 | If you use `Set You Free` please cite (see Zenodo):
21 | 
22 | 
23 | Gerloff C., Lotter L., & Maheshwari K. (2020). Set You Free: Automated Structured Literature Search.
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristianGerloff/set-you-free/f0752d468619456f97c83ee1d2456dfa4447f2b4/src/tests/__init__.py


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristianGerloff/set-you-free/f0752d468619456f97c83ee1d2456dfa4447f2b4/src/utils/__init__.py


--------------------------------------------------------------------------------
/src/utils/consts.py:
--------------------------------------------------------------------------------
 1 | """Defines constants used in the project."""
 2 | 
 3 | DUPLICATION_MIN_SLIDER = 0.75
 4 | DUPLICATION_MAX_SLIDER = 1.0
 5 | DUPLICATION_STEP_SLIDER = 0.01
 6 | RESULTS_MIN_SLIDER = 1
 7 | RESULTS_MAX_SLIDER = 1000
 8 | RESULTS_DEFAULT_SLIDER = 10
 9 | AVAILABLE_DATABASES = [
10 |     "ACM",
11 |     "arXiv",
12 |     "bioRxiv",
13 |     "IEEE",
14 |     "medRxiv",
15 |     "PubMed",
16 |     "Scopus"
17 | ]
18 | JOIN_TYPES = [
19 |     "None",
20 |     "(",
21 |     ")",
22 |     "AND",
23 |     "OR",
24 |     "(AND",
25 |     "(OR"
26 | ]
27 | SEARCH_STRING_TYPE = [
28 |     "Insert search string directly",
29 |     "Build search string"
30 | ]
31 | AVAILABLE_PUBTYPES = ['journal', 'preprint', 'conference', 'book']
32 | DEFAULT_PUBTYPES = ['journal', 'preprint']
33 | HELP_ENRICH = (
34 |     "Enrich aims to combine information across different databases to complete "
35 |     "missing information of a publication."
36 | )
37 | HELP_CROSS_REF = (
38 |     "The cross-reference option uses the reference list and the citations "
39 |     "of the found publications to extend the search results."
40 | )
41 | HELP_SEARCH_STRING = (
42 |     "[term a] OR ([term b] AND ([term c] OR [term d]"
43 | )


--------------------------------------------------------------------------------
/src/utils/search_engine.py:
--------------------------------------------------------------------------------
  1 | """Search functionalities definition."""
  2 | 
  3 | import json
  4 | import streamlit as st
  5 | import findpapers as fp
  6 | import utils.consts as cs
  7 | 
  8 | 
  9 | @st.cache
 10 | def convert_search_to_json(search: fp.models.search):
 11 |     """Cachs the converted search results
 12 | 
 13 |     Args:
 14 |         search (findpapers.models.search): search results
 15 | 
 16 |     Returns:
 17 |         json (meme): encoded json
 18 |     """
 19 |     results = fp.models.search.Search.to_dict(search)
 20 |     result = json.dumps(results, indent=2, sort_keys=True)
 21 |     return result
 22 | 
 23 | 
 24 | def join_string_in_list(list_of_string: list) -> str:
 25 |     """Joins the list of queries into one complete query.
 26 | 
 27 |     Args:
 28 |         list_of_string (list): List of queries.
 29 | 
 30 |     Returns:
 31 |         str: All queries combined.
 32 |     """
 33 |     return ' '.join(list_of_string)
 34 | 
 35 | 
 36 | def get_search_str():
 37 |     """Get the search string.
 38 | 
 39 |     Returns:
 40 |         str: search string.
 41 |     """
 42 |     if isinstance(st.session_state.query_string, list):
 43 |         query = st.session_state.query_string
 44 |         search_string = join_string_in_list(query)
 45 |     else:
 46 |         search_string = st.session_state.query_string
 47 |     return search_string
 48 | 
 49 | 
 50 | def build_search_str():
 51 |     """Builds the search string.
 52 | 
 53 |     Returns:
 54 |         str: Search string.
 55 |     """
 56 |     search_str_col, operator_col = st.columns([3, 1])
 57 |     search_string = search_str_col.text_input("Please enter the search string (e.g., fNIRS)", "")
 58 |     operator = operator_col.selectbox("Please select how to join your search strings",
 59 |                                       cs.JOIN_TYPES)
 60 |     add_button = st.button("Add")
 61 | 
 62 |     if "query_string" not in st.session_state:
 63 |         st.session_state.query_string = []
 64 | 
 65 |     if add_button and search_string == "":
 66 |         st.error("The search query can not be empty.")
 67 | 
 68 |     # append search string current session data
 69 |     elif add_button and search_string != "":
 70 |         st.session_state.query_string.append(f"[{search_string}]")
 71 |         if operator != "None":
 72 |             st.session_state.query_string.append(operator)
 73 | 
 74 |     search_str_txt = st.empty()
 75 |     if search_string != "":
 76 |         search_string = join_string_in_list(st.session_state.query_string)
 77 |         search_str_txt.write(search_string)
 78 |     return search_str_txt
 79 | 
 80 | 
 81 | def single_search_str():
 82 |     """Single search string definition.
 83 | 
 84 |     Returns:
 85 |         str: Search string.
 86 |     """
 87 |     if "query_string" not in st.session_state:
 88 |         st.session_state.query_string = []
 89 | 
 90 |     search_string = st.text_input(
 91 |         "Please enter the search string (e.g., [ASD] AND [fMRI])",
 92 |         "",
 93 |         help=cs.HELP_SEARCH_STRING)
 94 |     add_button = st.button("Add")
 95 | 
 96 |     if add_button and search_string == "":
 97 |         st.error("Please enter a search string (e.g., [ASD] AND [fMRI])")
 98 | 
 99 |     search_str_txt = st.empty()
100 |     if search_string != "":
101 |         search_str_txt.write(search_string)
102 |         st.session_state.query_string = search_string
103 | 
104 |     return search_str_txt
105 | 
106 | 
107 | def clear_search_str(search_str_txt, clear_all: bool = False):
108 |     """Clears the search string.
109 | 
110 |     Args:
111 |         search_str_txt (streamlit label): Search string label
112 |         clear_all (bool, optional): Clear all search string. Defaults to False.
113 |     """
114 |     # None operator
115 |     if (len(st.session_state.query_string) > 1 and
116 |        clear_all is False):
117 |         del st.session_state.query_string[-1:]
118 |     # operator
119 |     elif (len(st.session_state.query_string) > 1 and
120 |           clear_all is True and
121 |           st.session_state.query_string[-2] in cs.JOIN_TYPES):
122 |         del st.session_state.query_string[-2:]
123 |     elif isinstance(st.session_state.query_string, list):
124 |         st.session_state.query_string.clear()
125 |     elif isinstance(st.session_state.query_string, str):
126 |         st.session_state.query_string = ""
127 |     search_string = join_string_in_list(st.session_state.query_string)
128 |     search_str_txt.write(search_string)
129 | 
130 | 
131 | def set_build_btns(search_str_txt) -> bool:
132 |     """Set the build search buttons.
133 | 
134 |     Args:
135 |         search_str_txt (streamlit label): Search string label
136 | 
137 |     Returns:
138 |         bool: search button state
139 |     """
140 | 
141 |     search_col, clear_col, remove_last_col = st.columns(3)
142 |     search_btn = search_col.button("Search")
143 |     clear_btn = clear_col.button("Clear all")
144 |     remove_last_btn = remove_last_col.button("Remove the last query")
145 | 
146 |     # query cleaning
147 |     if clear_btn:
148 |         clear_search_str(search_str_txt, clear_all=True)
149 |     if remove_last_btn:
150 |         clear_search_str(search_str_txt, clear_all=False)
151 | 
152 |     return search_btn
153 | 
154 | 
155 | def set_single_btns(search_str_txt):
156 |     """Set the search buttons.
157 | 
158 |     Args:
159 |         search_str_txt (streamlit label): Search string label
160 | 
161 |     Returns:
162 |         bool: search button state
163 |     """
164 |     search_col, clear_col = st.columns(2)
165 |     search_btn = search_col.button("Search")
166 |     clear_btn = clear_col.button("Clear all")
167 | 
168 |     # query cleaning
169 |     if clear_btn:
170 |         clear_search_str(search_str_txt, clear_all=True)
171 |     return search_btn
172 | 


--------------------------------------------------------------------------------
/src/utils/site_config.py:
--------------------------------------------------------------------------------
 1 | """General site settings"""
 2 | 
 3 | import streamlit as st
 4 | 
 5 | 
 6 | def set_page_title(title: str = "Set You Free"):
 7 |     """Set the page title.
 8 | 
 9 |     Args:
10 |         title (str, optional): The title. Defaults to "Set You Free".
11 |     """
12 |     st.set_page_config(page_title=title,
13 |                        page_icon="📊",
14 |                        initial_sidebar_state="expanded")
15 |     set_page_style()
16 | 
17 | 
18 | def set_page_style():
19 |     """Set the page style."""
20 |     # adjust styling
21 |     hide_streamlit_style = """
22 |                 <style>
23 |                 #MainMenu {visibility: hidden;}
24 |                 footer {visibility: hidden;}
25 |                 </style>
26 |                 """
27 |     st.markdown(hide_streamlit_style, unsafe_allow_html=True)
28 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristianGerloff/set-you-free/f0752d468619456f97c83ee1d2456dfa4447f2b4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | from src.pages.main import join_string_in_list
2 | 
3 | 
4 | def test_join_string_in_list():
5 |     expected = "This is a test"
6 |     list_to_test = ["This", "is", "a", "test"]
7 |     assert join_string_in_list(list_to_test) == expected
8 | 


--------------------------------------------------------------------------------