├── .gitignore ├── pyproject.toml ├── images ├── main.png └── similarity.png ├── requirements.txt ├── .github └── dependabot.yml ├── .env.dist ├── main.py ├── README.md ├── LICENSE ├── Templates ├── report.html └── index.html ├── similarity.py └── websearch.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | __pycache__ 3 | .env 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff.lint] 2 | select = ["ALL"] 3 | -------------------------------------------------------------------------------- /images/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harirakul/Plagiarism-Detection/HEAD/images/main.png -------------------------------------------------------------------------------- /images/similarity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harirakul/Plagiarism-Detection/HEAD/images/similarity.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.13.3 2 | Flask==3.1.0 3 | nltk==3.9.1 4 | pandas==2.2.3 5 | requests==2.32.3 6 | loguru==0.7.3 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | -------------------------------------------------------------------------------- /.env.dist: -------------------------------------------------------------------------------- 1 | # Search system settings 2 | 3 | # https://www.google.com/search?q= 4 | # https://ya.ru/search/?text= 5 | # https://www.bing.com/search?q= 6 | # https://duckduckgo.com/?q= 7 | 8 | SEARCH_LINK=https://ya.ru/search/?text= 9 | USER_AGENT=Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 10 | COOKIE= 11 | BLACK_LIST= 12 | 13 | 14 | # Flask server settings 15 | 16 | DEBUG=True 17 | HOST=localhost 18 | PORT=5555 19 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """Main module: starting app.""" 2 | 3 | import os 4 | 5 | from dotenv import load_dotenv 6 | from flask import Flask, render_template, request 7 | 8 | import similarity 9 | 10 | app = Flask(__name__, template_folder="Templates") 11 | 12 | 13 | @app.route("/", methods=["GET", "POST"]) 14 | def main_page() -> str: 15 | """Render and return main page.""" 16 | return render_template("index.html") 17 | 18 | 19 | @app.route("/report", methods=["POST", "GET"]) 20 | def report_page() -> str: 21 | """Render and return report page.""" 22 | result = request.form["text"] 23 | return render_template("report.html") + similarity.return_table( 24 | similarity.report(str(result)), 25 | ) 26 | 27 | 28 | if __name__ == "__main__": 29 | # Loading consts from .env 30 | load_dotenv() 31 | 32 | IS_DEBUG = os.getenv("DEBUG").lower() == "true" 33 | HOST = os.getenv("HOST") 34 | PORT = os.getenv("PORT") 35 | 36 | # Starting flask app 37 | app.run(debug=IS_DEBUG, host=HOST, port=PORT) 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Plagiarism Detection with Python 2 | 3 |  4 | 5 |  6 | 7 | Finds percentage similarity between inputted text and relevant articles on the web. 8 | 9 | ## How to install? 10 | 11 | 1. Clone repository: 12 | 13 | ``` 14 | git clone https://github.com/harirakul/Plagiarism-Detection.git 15 | ``` 16 | 17 | 2. Create and activate venv: 18 | 19 | ``` 20 | python3 -m venv venv 21 | . venv/bin/activate 22 | ``` 23 | 24 | 3. Install requirements: 25 | 26 | ``` 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | 4. Rename `.env.dist` to `.env`. 31 | 32 | > For better work you need fill `COOKIE` in `.env`. You can grab on dev tools in your browser. 33 | 34 | 4. Start web-app: 35 | 36 | ``` 37 | python3 main.py 38 | ``` 39 | 40 | ## Website Link: 41 | http://plagiarism-detection--hariambethkar.repl.co/ 42 | 43 | ## How To: 44 | - Click on the link to the website 45 | - Enter or paste your text in the input field 46 | - Click on `Generate Report` 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 harirakul 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Templates/report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 |You will be redirected once your report is generated.
69 |Time to generate report depends on length of text.
70 |