├── .all-contributorsrc ├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ ├── ----bug-report.md │ ├── ---feature-request.md │ └── ---say-thank-you.md ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── _config.yml ├── demo.py ├── docs ├── about.md ├── changelog.md ├── index.md ├── insights.md ├── installation.md ├── moss.md └── usage.md ├── mkdocs.yml ├── plagcheck ├── __init__.py ├── analyze.py ├── plagcheck.py └── plagcheck_test.py ├── requirements-dev.txt ├── setup.py └── testfiles ├── test_java.java ├── test_java2.java ├── test_java3.java ├── test_java4.java ├── test_java5.java ├── test_java6.java ├── test_java7.java ├── test_python.py ├── test_python2.py └── test_python3.py /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "README.md" 4 | ], 5 | "imageSize": 100, 6 | "commit": false, 7 | "contributors": [ 8 | { 9 | "login": "vhsw", 10 | "name": "Alexey Dubrov", 11 | "avatar_url": "https://avatars3.githubusercontent.com/u/7099976?v=4", 12 | "profile": "https://github.com/vhsw", 13 | "contributions": [ 14 | "code", 15 | "bug", 16 | "test" 17 | ] 18 | } 19 | ], 20 | "contributorsPerLine": 7, 21 | "projectName": "PlagCheck", 22 | "projectOwner": "codeclassroom", 23 | "repoType": "github", 24 | "repoHost": "https://github.com" 25 | } 26 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: bhupesh 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/----bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug report" 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---say-thank-you.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F49F Say thank you" 3 | about: Just say thanks if you liked PlagCheck 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | if you liked **PlagCheck** - please let us know. We'd love to hear from you! 11 | 12 | You can help me in any way possible 13 | 14 | - [ ] Give the repository a star ⭐️. 15 | - [ ] Help out with issues. 16 | - [ ] Share it with others. 17 | - [ ] Support me on [Patreon](https://www.patreon.com/bePatron?u=18082750). 18 | 19 | Thank you! 💐 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # custom 107 | /submission 108 | sample.py -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | - "3.8" 6 | 7 | install: 8 | - pip install -r requirements-dev.txt 9 | - pip install isort black flake8 pylint 10 | - pip install coveralls 11 | script: 12 | - pytest plagcheck 13 | - py.test plagcheck/plagcheck_test.py 14 | - isort --check-only --recursive plagcheck 15 | - black --check --diff plagcheck 16 | - flake8 plagcheck --max-line-length=88 --ignore=F401 17 | - pylint plagcheck --disable=bad-continuation,invalid-name,attribute-defined-outside-init,no-self-use,too-many-locals,too-few-public-methods 18 | after_success: 19 | - coveralls 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | 4 | ## [0.4] - March 10, 2020 5 | 6 | ### Changed [⚠️ Breaking Changes] 7 | - `getShareScores` & `getInsights` have been decoupled from the check class, they now have to be imported separately. 8 | - Minor changes in the `analyze.py` module. 9 | 10 | 11 | ## [0.3] - Jan 1, 2020 12 | 13 | ### Added 14 | 15 | - New module `analyze.py` for Moss Results analysis 16 | - `getShareScores()` for returning frequency of shared files. 17 | - `addFile()` for adding files. 18 | - `addFilesByWildCard()` for submitting multiple files. 19 | - Support for adding base code using `addBaseCode()`. 20 | 21 | ### Changed 22 | - The plagcheck module is now more modularised. `check` is now a class. 23 | - `__get_line_numbers()` now runs in a new thread. 24 | 25 | ### Removed 26 | - `requests` as a dependency, network requests are now 50% faster. 27 | 28 | 29 | ## [0.2] - Nov 9, 2019 30 | - Minor Improvements 31 | 32 | 33 | ## [0.1] - Nov 3, 2019 34 | - Initial Release 35 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PlagCheck 2 | 3 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 4 | 5 | Make sure you follow below guidelines before contributing. 6 | 7 | 1. Raise an issue before sending any PR. 8 | 2. Make you changes to `feature` branch. 9 | 3. See if there is already an open PR for the same issue. 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Bhupesh Varshney 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-exclude *_test.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PlagCheck ✅ 2 | 3 | > Moss Results scraper with powerful insights & analysis 💡 4 | 5 | ![PyPI](https://img.shields.io/pypi/v/plagcheck?color=blue) 6 | [![Build Status](https://travis-ci.org/codeclassroom/PlagCheck.svg?branch=master)](https://travis-ci.org/codeclassroom/PlagCheck) 7 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/plagcheck) 8 | [![Documentation Status](https://readthedocs.org/projects/plagcheck/badge/?version=latest)](https://plagcheck.readthedocs.io/en/latest/?badge=latest) 9 | ![PyPI - License](https://img.shields.io/pypi/l/plagcheck?color=orange) 10 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/plagcheck?color=blue) 11 | 12 | 13 | ## Installation 14 | 15 | Install using `pip` from PyPI 16 | 17 | ```bash 18 | pip install plagcheck 19 | ``` 20 | 21 | or directly from GitHub if you cannot wait to test new features 22 | 23 | ```bash 24 | pip install git+https://github.com/codeclassroom/PlagCheck.git 25 | ``` 26 | 27 | ## Usage 28 | 29 | ```python 30 | 31 | """Usage example""" 32 | import os 33 | import pprint 34 | from plagcheck.plagcheck import check, insights, share_scores 35 | 36 | from dotenv import load_dotenv 37 | load_dotenv() 38 | 39 | language = "java" 40 | userid = os.environ["USER_ID"] 41 | 42 | 43 | moss = check(language, userid) 44 | 45 | moss.addFilesByWildCard("testfiles/test_java*.java") 46 | 47 | # or moss.addFile("testfiles/test_python.py") 48 | 49 | moss.submit() 50 | 51 | print(moss.getHomePage()) 52 | 53 | result = moss.getResults() 54 | 55 | pprint.pprint(result) 56 | 57 | # print potential distributor-culprit relationships 58 | pprint.pprint(insights(result)) 59 | # print frequency of each shared solution 60 | pprint.pprint(share_scores(result)) 61 | 62 | ``` 63 | 64 | ## Documentation 65 | 66 | > [PlagCheck Documentation](https://plagcheck.readthedocs.io/en/latest/) 67 | 68 | 69 | ## Development 70 | 71 | ##### Prerequisites 72 | - Python 3.6+ 73 | - virtualenv 74 | 75 | 1. Create virtual environment. 76 | ```bash 77 | virtualenv -p python3 venv && cd venv && source bin/activate 78 | ``` 79 | 2. Clone the repository. 80 | ```bash 81 | git https://github.com/codeclassroom/PlagCheck.git 82 | ``` 83 | 3. Install Dependencies. 84 | ```bash 85 | pip install -r requirements-dev.txt 86 | ``` 87 | 4. Run tests. 88 | ```bash 89 | pytest plagcheck 90 | ``` 91 | 5. Lint the project with 92 | ```bash 93 | flake8 plagcheck --max-line-length=88 --ignore=F401 94 | black --check --diff plagcheck 95 | ``` 96 | 97 | ## 📝 Changelog 98 | 99 | See the [CHANGELOG.md](CHANGELOG.md) file for details. 100 | 101 | 102 | ## Author 103 | 104 | 👥 **Bhupesh Varshney** 105 | 106 | - Twitter: [@bhupeshimself](https://twitter.com/bhupeshimself) 107 | - DEV: [bhupesh](https://dev.to/bhupesh) 108 | 109 | [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg)](https://forthebadge.com) 110 | 111 | ## 📜 License 112 | 113 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. 114 | 115 | ## 👋 Contributing 116 | 117 | Please read the [CONTRIBUTING](CONTRIBUTING.md) guidelines for the process of submitting pull requests to us. 118 | 119 | 120 | ## Contributors ✨ 121 | 122 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 |
Alexey Dubrov
Alexey Dubrov

💻 🐛 ⚠️
131 | 132 | 133 | 134 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! 135 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-merlot 2 | show_downloads: true 3 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | """Usage example""" 2 | import os 3 | import pprint 4 | from plagcheck.plagcheck import check, insights, share_scores 5 | 6 | from dotenv import load_dotenv 7 | load_dotenv() 8 | 9 | language = "java" 10 | userid = os.environ["USER_ID"] 11 | 12 | 13 | moss = check(language, userid) 14 | 15 | moss.addFilesByWildCard("testfiles/test_java*.java") 16 | 17 | # or moss.addFile("testfiles/test_python.py") 18 | 19 | moss.submit() 20 | 21 | print(moss.getHomePage()) 22 | 23 | result = moss.getResults() 24 | 25 | pprint.pprint(result) 26 | 27 | # print potential distributor-culprit relationships 28 | pprint.pprint(insights(result)) 29 | # print frequency of each shared solution 30 | pprint.pprint(share_scores(result)) 31 | -------------------------------------------------------------------------------- /docs/about.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | plagcheck was built by : 4 | 5 | 👥 **Bhupesh Varshney** 6 | 7 | - Twitter: [@bhupeshimself](https://twitter.com/bhupeshimself) 8 | - DEV: [bhupesh](https://dev.to/bhupesh) 9 | - GitHub: [Bhupesh-V](https://github.com/Bhupesh-V) 10 | 11 | ## 📝 License 12 | 13 | This project is licensed under the MIT License. See the [LICENSE](https://github.com/codeclassroom/PlagCheck/blob/master/LICENSE) file for details. 14 | 15 | ## 👋 Contributing 16 | 17 | Please read the [CONTRIBUTING](https://github.com/codeclassroom/PlagCheck/blob/master/CONTRIBUTING.md) guidelines for the process of submitting pull requests to us. -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | 4 | ## [0.4] - March 10, 2020 5 | 6 | ### Changed [⚠️ Breaking Changes] 7 | - `getShareScores` & `getInsights` have been decoupled from the check class, they now have to be imported separately. 8 | - Minor changes in the `analyze.py` module. 9 | 10 | 11 | ## [0.3] - Jan 1, 2020 12 | 13 | ### Added 14 | 15 | - New module `analyze.py` for Moss Results analysis 16 | - `getShareScores()` for returning frequency of shared files. 17 | - `addFile()` for adding files. 18 | - `addFilesByWildCard()` for submitting multiple files. 19 | - Support for adding base code using `addBaseCode()`. 20 | 21 | ### Changed 22 | - The plagcheck module is now more modularised. `check` is now a class. 23 | - `__get_line_numbers()` now runs in a new thread. 24 | 25 | ### Removed 26 | - `requests` as a dependency, network requests are now 50% faster. 27 | 28 | 29 | ## [0.2] - Nov 9, 2019 30 | - Minor Improvements 31 | 32 | 33 | ## [0.1] - Nov 3, 2019 34 | - Initial Release 35 | 36 | # Releases 37 | See releases on [PyPi](https://pypi.org/project/plagcheck/#history) -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # PlagCheck ✅ 2 | 3 | > Moss Results scraper with powerful insights & analysis 💡 4 | 5 | ![PyPI](https://img.shields.io/pypi/v/plagcheck?color=blue) 6 | [![Build Status](https://travis-ci.org/codeclassroom/PlagCheck.svg?branch=master)](https://travis-ci.org/codeclassroom/PlagCheck) 7 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/plagcheck) 8 | [![Documentation Status](https://readthedocs.org/projects/plagcheck/badge/?version=latest)](https://plagcheck.readthedocs.io/en/latest/?badge=latest) 9 | ![PyPI - License](https://img.shields.io/pypi/l/plagcheck?color=orange) 10 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/plagcheck?color=blue) -------------------------------------------------------------------------------- /docs/insights.md: -------------------------------------------------------------------------------- 1 | # Insights 2 | 3 | PlagCheck provides algorithmic analysis of Moss results. 4 | 5 | ### Terminologies 6 | 7 | ### 1. Node 8 | Nodes are results returned by Moss i.e every 9 | individual file. 10 | 11 | ### 2. Tags 12 | Tags are roles which a file serves i.e. a tag is 13 | a potential distributor or potential culprit or 14 | both. 15 | 16 | ### 3. M-group 17 | m-groups (moss-groups) are groups of solution which have similar code. 18 | For example A student who solves a programming problem may share their 19 | solution with 3 of his/her friends, that is a single m-group with 4 nodes. 20 | 21 | For example if you run [demo.py](https://github.com/codeclassroom/PlagCheck/blob/master/demo.py), `insights()` will return the following data: 22 | ```java 23 | 24 | {'DCtoC Paths': [('testfiles/test_java5.java', 'testfiles/test_java2.java'), 25 | ('testfiles/test_java4.java', 'testfiles/test_java2.java')], 26 | 'DtoC Paths': [('testfiles/test_java3.java', 'testfiles/test_java2.java'), 27 | ('testfiles/test_java3.java', 'testfiles/test_java.java'), 28 | ('testfiles/test_java7.java', 'testfiles/test_java6.java')], 29 | 'DtoDC Paths': [('testfiles/test_java3.java', 'testfiles/test_java5.java'), 30 | ('testfiles/test_java3.java', 'testfiles/test_java4.java')]} 31 | 32 | ``` 33 | 34 | This analysis can be visualized into following _Disconnected Directed Graph_ 35 | 36 | ![moss results](https://drive.google.com/uc?export=view&id=1Lc8obgjihfo7EGimn300mTtqfmHK0Zem) 37 | 38 | We assign Tags to every individual Node. 39 | 40 | 1. D - Distributor 41 | Student(s) who distributed their 42 | code in a group. 43 | 2. C - Culprit 44 | Student(s) who copied the shared 45 | code. 46 | 3. DC - Both a Distributor & Culprit 47 | 48 | In the above depicted graph, there are 2 unique _m-groups_. 49 | 50 | 1. Group 1 : [1, 2, 3, 4, 5] 51 | 2. Group 2 : [7, 6] -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Installing plagcheck is pretty simple, just run 4 | 5 | ```bash 6 | pip install plagcheck 7 | ``` 8 | 9 | Install a specific verison 10 | 11 | ```bash 12 | pip install plagcheck==0.4 13 | ``` 14 | 15 | or directly from GitHub if you cannot wait to test new features 16 | 17 | ```bash 18 | pip install git+https://github.com/codeclassroom/PlagCheck.git 19 | ``` 20 | 21 | If you have a old version, update it using 22 | 23 | ```bash 24 | pip install --upgrade plagcheck 25 | ``` -------------------------------------------------------------------------------- /docs/moss.md: -------------------------------------------------------------------------------- 1 | ## Moss 2 | > Derived from [Reading the Results](http://moss.stanford.edu/general/format.html). 3 | 4 | 5 | 6 | ## [Tips](http://moss.stanford.edu/general/tips.html) 7 | 8 | - Moss is quite conservative about what it considers to be matching passages of code. If Moss says that two passages look alike, then they almost certainly look quite alike. Moss also excludes all code that appears in too many of the submitted programs. Thus, all matches reported by Moss fairly accurately approximate the signature of plagiarized code: a passage of similar code in two programs that does not also appear in very many other programs. 9 | 10 | - False positives are possible with Moss, as programs may legitimately share code (e.g., two programs making use of the same public-domain library). The higher-ranked pairs are more likely to be the result of plagiarism than the lower-ranked pairs. The recommended strategy is to start with the highest-ranked pair and work down the list until one finds that a large fraction of the reported matches are false positives. 11 | 12 | - Moss can be more accurate if a base file is supplied. The -b option to Moss supplies a base file of code that should be ignored if it appears in programs; Moss never considers code that appears in a base file to match any other code. If your results include many unintended matches, then it is best to place all legitimately shared code in a base file (e.g., instructor-supplied code, common libraries, etc.) and resubmit the query to the server. 13 | 14 | - Moss detects structural similarities in programs and nothing more; it has no idea why programs may be structurally similar. As noted above, there are reasons besides plagiarism that two programs may appear the same (e.g., they are both based on the same third program, such as instructor-supplied code for an assignment). Results from Moss cannot be taken as direct evidence of plagiarism---it is still necessary for someone to examine the programs and make a judgment. 15 | 16 | ## Credits 17 | Moss was written and is maintained by Alex Aiken, aiken@cs.stanford.edu. 18 | 19 | The HTML interface was conceived of and designed by Guido Malpohl (s_malpoh@ira.uka.de), the author of JPlag, a plagiarism detection system for Java programs. 20 | 21 | PlagCheck extracts information from the webpages for easier storing & analysis of results. 22 | Contact [varshneybhupesh@gmail.com](mailto:varshneybhupesh@gmail.com) for more info. 23 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | plagcheck provides the following classes & methods: 4 | 5 | ### check(files, lang, user_id) 6 | 7 | * **Parameters** : 8 | - files : Program Files. 9 | - lang : The Programming Language. 10 | - output : Moss UserID. 11 | 12 | 13 | **Demo**: 14 | ```python 15 | 16 | """Usage example""" 17 | import os 18 | import pprint 19 | from plagcheck.plagcheck import check, insights, share_scores 20 | 21 | from dotenv import load_dotenv 22 | load_dotenv() 23 | 24 | language = "java" 25 | userid = os.environ["USER_ID"] 26 | 27 | 28 | moss = check(language, userid) 29 | 30 | moss.addFilesByWildCard("testfiles/test_java*.java") 31 | 32 | # or moss.addFile("testfiles/test_python.py") 33 | 34 | moss.submit() 35 | 36 | print(moss.getHomePage()) 37 | 38 | result = moss.getResults() 39 | 40 | pprint.pprint(result) 41 | 42 | # print potential distributor-culprit relationships 43 | pprint.pprint(insights(result)) 44 | # print frequency of each shared solution 45 | pprint.pprint(share_scores(result)) 46 | 47 | ``` 48 | 49 | ### 1. submit() 50 | **Parameters** : `None`
51 | **Return Type** : `None`
52 | **Description**: Submits the program on Moss.
53 | **Demo**: 54 | ```python 55 | 56 | c.submit() 57 | 58 | ``` 59 | 60 | ### 2. getHomePage() 61 | **Parameters** : `None`
62 | **Return Type** : `String`
63 | **Description**: Returns the Moss Result HomePage URL
64 | **Demo**: 65 | ```python 66 | 67 | c.getHomePage() 68 | 69 | ``` 70 | 71 | ### 3. getResults() 72 | **Parameters** : `None`
73 | **Return Type** : `List`
74 | **Description**: Returns the scraped data from Moss Results .
75 | **Demo**: 76 | ```python 77 | 78 | c.getResults() 79 | 80 | ``` 81 | 82 | *getResults()* returns the following list of dictionaries: 83 | ```json 84 | [ 85 | { 86 | "file1":"filename1.py", 87 | "file2":"filename2.py", 88 | "percentage": 34, 89 | "no_of_lines_matched": 3, 90 | "lines_matched":[["2-3", "10-11"]] 91 | }, 92 | .... 93 | ] 94 | ``` 95 | Each dict item contains the following items: 96 | 97 | - **file1** & **file2** : 98 | The pair of file names that have similar code. 99 | 100 | - **percentage** : 101 | It is the the percentage of the code in one file considered to match code in the other file. 102 | 103 | - **lines_matched** : 104 | Lines Matched is approximately the number of lines of code that matched between 2 given files. 105 | 106 | For example : 107 | If ***lines_matched* is [['88-99','119-131']]** 108 | 109 | Then the line numbers 88-99 of *file1* matched with lines 119-131 of *file2*. 110 | 111 | lines_matched is a list of lists indicating all line number matches between 2 code files. 112 | 113 | 114 | > For both measures(*lines_matched* & *percentage*), higher numbers mean more code matches. 115 | 116 | ### 4. addFilesByWildCard() 117 | **Parameters** : `String`
118 | **Return Type** : `None`
119 | **Description**: Add multiple files.
120 | **Demo**: 121 | ```python 122 | 123 | c.addFilesByWildCard("testfiles/test_python*.py") 124 | # This will add all the files having names like, test_python2, test_python5 etc. 125 | 126 | ``` 127 | 128 | ### 5. addFile() 129 | **Parameters** : `String`
130 | **Return Type** : `None`
131 | **Description**: Add a single file for submission.
132 | **Demo**: 133 | ```python 134 | 135 | c.addFile("testfiles/test_python.py") 136 | 137 | ``` 138 | 139 | ### 6. addBaseCode() 140 | **Parameters** : `String`
141 | **Return Type** : `None`
142 | **Description**: Add an allowed code file which is use by Moss to ignore results matching with this file
143 | **Demo**: 144 | ```python 145 | 146 | c.addBaseCode("/base.py") 147 | 148 | ``` 149 | 150 | - Moss normally reports all code 151 | that matches in pairs of files. When a base file is supplied, 152 | program code that also appears in the base file is not counted in matches. 153 | - A typical base file will include, for example, the instructor-supplied 154 | code for an assignment. Multiple Base files are allowed. 155 | - You should use a base file if it is convenient; base files improve results, but are not usually necessary for obtaining useful information. 156 | 157 |
158 | 159 | ### share_scores() 160 | **Parameters** : `Moss Results`(returned by `getResults()`)
161 | **Return Type** : `Dict`
162 | **Description**: Share Score is a utility which returns frequency of every individual file.
163 | **Demo**: 164 | ```python 165 | 166 | print(share_scores(moss_data)) 167 | 168 | # Will return 169 | """ 170 | {'testfiles/test_python.py': 2, 171 | 'testfiles/test_python2.py': 2, 172 | 'testfiles/test_python3.py': 2} 173 | """ 174 | ``` 175 | Share Score is basically the frequency of each file appearing in Moss Results. 176 | i.e Higher the frequency, the more is that solution "shared" by different files. 177 | 178 | ### insights() 179 | **Parameters** : `Moss Results`(returned by `getResults()`)
180 | **Return Type** : `Dict`
181 | **Description**: See [Insights](/insights).
182 | **Demo**: 183 | ```python 184 | 185 | print(insights(moss_data)) 186 | 187 | ``` -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: PlagCheck 2 | site_url: https://codeclassroom.github.io/PlagCheck/ 3 | repo_url: https://github.com/codeclassroom/PlagCheck 4 | site_author: Bhupesh Varshney 5 | site_description: PlagCheck v0.3 Documentation 6 | copyright: © 2019, Bhupesh Varshney 7 | nav: 8 | - Documentation: index.md 9 | - Installation: installation.md 10 | - Usage: usage.md 11 | - PlagCheck Insights: insights.md 12 | - Moss: moss.md 13 | - Changelog: changelog.md 14 | - About: about.md 15 | theme: readthedocs 16 | markdown_extensions: 17 | - toc: 18 | permalink: "#" -------------------------------------------------------------------------------- /plagcheck/__init__.py: -------------------------------------------------------------------------------- 1 | """The MOSS interface package for CodeClassroom""" 2 | from plagcheck.plagcheck import check, insights, share_scores 3 | -------------------------------------------------------------------------------- /plagcheck/analyze.py: -------------------------------------------------------------------------------- 1 | """ 2 | analyze.py 3 | ==================================== 4 | The Analysis Module for Moss 5 | (For info. on how this works contact varshneybhupesh@gmail.com) 6 | """ 7 | 8 | 9 | class Node: 10 | """A Single Submitted file""" 11 | 12 | def __init__(self, name): 13 | self.name = name 14 | self.tag = None 15 | self.links = [] 16 | 17 | def pointTo(self): 18 | """Return all nodes a node points to""" 19 | return [link.name for link in self.links] 20 | 21 | 22 | class Mgroups: 23 | """A Disconnected directed graph consisting all individual solutions""" 24 | 25 | linkCount = 0 26 | 27 | def __init__(self): 28 | self.nodes = [] 29 | self.nodeCount = 0 30 | 31 | def relate(self, P1, P2, node1, node2): 32 | """Set a path between two file nodes""" 33 | node_obj_dict = {} 34 | 35 | for r in self.nodes: 36 | node_obj_dict[r.name] = r 37 | 38 | if node1 in node_obj_dict.keys() and node2 in node_obj_dict.keys(): 39 | if P1 < P2: 40 | node_obj_dict[node1].links.append(node_obj_dict[node2]) 41 | elif P1 > P2: 42 | node_obj_dict[node2].links.append(node_obj_dict[node1]) 43 | else: 44 | node_obj_dict[node1].links.append(node_obj_dict[node2]) 45 | node_obj_dict[node2].links.append(node_obj_dict[node1]) 46 | 47 | Mgroups.linkCount += 1 48 | 49 | def __indegree(self, node: Node): 50 | indegree_count = 0 51 | for n in self.nodes: 52 | for link in n.pointTo(): 53 | if link == node.name: 54 | indegree_count += 1 55 | return indegree_count 56 | 57 | def __outdegree(self, node: Node): 58 | return len(node.links) 59 | 60 | def set_tags(self): 61 | """Assign appropriate tag to a Node""" 62 | for node in self.nodes: 63 | in_degree = self.__indegree(node) 64 | out_degree = self.__outdegree(node) 65 | 66 | if in_degree == 0 and out_degree > 0: 67 | node.tag = "D" 68 | elif in_degree > 0 and out_degree == 0: 69 | node.tag = "C" 70 | elif in_degree != 0 and out_degree != 0: 71 | node.tag = "DC" 72 | 73 | def createNodes(self, node_set: set): 74 | """Create multiple nodes at the same time""" 75 | for n in node_set: 76 | self.addNode(n) 77 | 78 | def addNode(self, name: str): 79 | """Add a single node to graph""" 80 | if name not in [r.name for r in self.nodes]: 81 | node = Node(name) 82 | self.nodes.append(node) 83 | self.nodeCount += 1 84 | return node 85 | 86 | def displayNodes(self): 87 | """Return all nodes in the graph""" 88 | return [r.name for r in self.nodes] 89 | 90 | def displayTags(self): 91 | """Display Nodes with their Tags""" 92 | for node in self.nodes: 93 | print("{}, tag = {}".format(node.name, node.tag)) 94 | 95 | def d2c(self): 96 | """All Direct Distributor to Culprit paths""" 97 | paths = [] 98 | for node in self.nodes: 99 | for link in node.links: 100 | if node.tag == "D" and link.tag == "C": 101 | paths.append(tuple((node.name, link.name))) 102 | return paths 103 | 104 | def d2dc(self): 105 | """All Direct Distributor to potential Distributor-Culprit paths""" 106 | paths = [] 107 | for node in self.nodes: 108 | for link in node.links: 109 | if node.tag == "D" and link.tag == "DC": 110 | paths.append(tuple((node.name, link.name))) 111 | return paths 112 | 113 | def dc2c(self): 114 | """All potential Distributor-Culprit to direct Culprit paths""" 115 | paths = [] 116 | for node in self.nodes: 117 | for link in node.links: 118 | if node.tag == "DC" and link.tag == "C": 119 | paths.append(tuple((node.name, link.name))) 120 | return paths 121 | 122 | def __repr__(self): 123 | """Pretty prints the graph""" 124 | paths = "" 125 | for node in self.nodes: 126 | for link in node.links: 127 | paths += "{0} --> {1}\n".format(node.name, link.name) 128 | return paths 129 | -------------------------------------------------------------------------------- /plagcheck/plagcheck.py: -------------------------------------------------------------------------------- 1 | """The MOSS interface package for CodeClassroom""" 2 | import collections 3 | import re 4 | import urllib.request 5 | from concurrent.futures import ThreadPoolExecutor 6 | from typing import List, Tuple 7 | 8 | import mosspy 9 | from bs4 import BeautifulSoup as bs 10 | 11 | from plagcheck.analyze import Mgroups 12 | 13 | HEADERS = {"User-Agent": "Mozilla/5.0"} 14 | 15 | 16 | class Result(dict): 17 | """Typing for moss results""" 18 | 19 | file1: str 20 | file2: str 21 | percentage: int 22 | no_of_lines_matched: int 23 | lines_matched: List[List[str]] 24 | 25 | 26 | Results = List[Result] 27 | 28 | 29 | def perc_str_to_int(string: str) -> int: 30 | """Convert string like "(42%)" to 42""" 31 | match = re.search(r"\((\d+)%\)$", string) 32 | if match: 33 | return int(match.group(1)) 34 | raise ValueError("Cannot find percentage in table") 35 | 36 | 37 | def request(url: str): 38 | """Request Webpage""" 39 | req = urllib.request.Request(url, headers=HEADERS) 40 | with urllib.request.urlopen(req) as response: 41 | req = response.read() 42 | 43 | return req.decode("utf-8") 44 | 45 | 46 | def share_scores(moss_data: dict) -> dict: 47 | """Share Score Insights""" 48 | similar_code_files = [] 49 | for result in moss_data: 50 | similar_code_files.append(result["file1"]) 51 | similar_code_files.append(result["file2"]) 52 | 53 | # frequency of files which are similar 54 | share_score = collections.Counter(similar_code_files) 55 | 56 | return dict(share_score) 57 | 58 | 59 | def insights(moss_data: dict) -> dict: 60 | """Analysis for Moss""" 61 | mg = Mgroups() 62 | similar_code_files = set() 63 | insights = {} 64 | 65 | for r in moss_data: 66 | similar_code_files.add(r["file1"]) 67 | similar_code_files.add(r["file2"]) 68 | 69 | mg.createNodes(similar_code_files) 70 | 71 | for r in moss_data: 72 | mg.relate( 73 | r["percentage_file1"], r["percentage_file2"], r["file1"], r["file2"] 74 | ) 75 | 76 | mg.set_tags() 77 | 78 | insights["DtoC Paths"] = mg.d2c() 79 | insights["DtoDC Paths"] = mg.d2dc() 80 | insights["DCtoC Paths"] = mg.dc2c() 81 | 82 | return insights 83 | 84 | 85 | class check: 86 | """ 87 | Args: 88 | - Program Files (list) 89 | - Language (str) 90 | - Moss User ID (str) 91 | """ 92 | 93 | def __init__(self, lang: str, user_id: str): 94 | 95 | self.__user_id = user_id 96 | languages = mosspy.Moss.languages 97 | if lang not in languages: 98 | raise ValueError(f"{lang} is not a supported language {languages}") 99 | self.lang = lang 100 | self.__moss = mosspy.Moss(self.__user_id, self.lang) 101 | 102 | def __extract_info(self) -> Results: 103 | """Scrape the webpage for file names, percentage match etc.""" 104 | results: Results = [] 105 | 106 | response = request(self.home_url) 107 | 108 | html = bs(response, "lxml") 109 | table = html.find("table") 110 | for row in table.find_all("tr")[1:]: 111 | col1, col2, col3 = row.find_all("td") 112 | filename1, perc1 = col1.text.strip().split() 113 | filename2, perc2 = col2.text.strip().split() 114 | 115 | with ThreadPoolExecutor() as executor: 116 | future = executor.submit(self.__get_line_numbers, col1.a.get("href")) 117 | lines = future.result() 118 | 119 | result_dict = Result( 120 | file1=filename1, 121 | file2=filename2, 122 | percentage_file1=perc_str_to_int(perc1), 123 | percentage_file2=perc_str_to_int(perc2), 124 | no_of_lines_matched=int(col3.text.strip()), 125 | lines_matched=lines, 126 | ) 127 | results.append(result_dict) 128 | return results 129 | 130 | def __get_line_numbers(self, url: str) -> List[List[str]]: 131 | """Get Line Numbers which are same""" 132 | list_of_line_nos: List[List[str]] = [] 133 | result_page = re.sub(r".html$", "-top.html", url) 134 | 135 | response = request(result_page) 136 | 137 | html = bs(response, "lxml") 138 | table = html.find("table") 139 | for row in table.find_all("tr")[1:]: 140 | matched_lines: List[str] = [] 141 | for col in row.find_all("td"): 142 | line_nos: str = col.text.strip() 143 | if line_nos: 144 | matched_lines.append(line_nos) 145 | list_of_line_nos.append(matched_lines) 146 | return list_of_line_nos 147 | 148 | def addFilesByWildCard(self, files): 149 | """Add multiple files""" 150 | self.__moss.addFilesByWildcard(files) 151 | 152 | def addFile(self, file): 153 | """Add a single file for submission""" 154 | self.__moss.addFile(file) 155 | 156 | def addBaseCode(self, base_file: str): 157 | """Add basefile""" 158 | self.__moss.addBaseFile(base_file) 159 | 160 | def submit(self): 161 | """Submit files to the Moss Server""" 162 | url = self.__moss.send() 163 | 164 | self.home_url = url 165 | self.moss_results = self.__extract_info() 166 | 167 | def getHomePage(self): 168 | """Return Moss Results HomePage URL""" 169 | return self.home_url 170 | 171 | def getResults(self) -> Tuple[str, Results]: 172 | """Return the result as a list of dictionary""" 173 | 174 | return self.moss_results 175 | -------------------------------------------------------------------------------- /plagcheck/plagcheck_test.py: -------------------------------------------------------------------------------- 1 | """Tests for the MOSS interface package for CodeClassroom""" 2 | from plagcheck import analyze, plagcheck 3 | from plagcheck.plagcheck import insights, share_scores 4 | 5 | 6 | def test_check(): 7 | """General test""" 8 | language = "python" 9 | userid = "1" 10 | temp = plagcheck.check(language, userid) 11 | temp.addFile("testfiles/test_python.py") 12 | temp.addFile("testfiles/test_python2.py") 13 | temp.submit() 14 | results = temp.getResults() 15 | moss_insights = insights(results) 16 | moss_share_scores = share_scores(results) 17 | 18 | assert moss_share_scores == { 19 | "testfiles/test_python.py": 1, 20 | "testfiles/test_python2.py": 1, 21 | } 22 | 23 | assert moss_insights == {"DCtoC Paths": [], "DtoC Paths": [], "DtoDC Paths": []} 24 | 25 | assert results == [ 26 | { 27 | "file1": "testfiles/test_python.py", 28 | "file2": "testfiles/test_python2.py", 29 | "lines_matched": [["1-3", "1-3"]], 30 | "no_of_lines_matched": 3, 31 | "percentage_file1": 90, 32 | "percentage_file2": 90, 33 | }, 34 | ] 35 | 36 | 37 | def test_perc_str_to_int(): 38 | """Test string parsing""" 39 | result = plagcheck.perc_str_to_int("(0%)") 40 | assert result == 0 41 | result = plagcheck.perc_str_to_int("(42%)") 42 | assert result == 42 43 | result = plagcheck.perc_str_to_int("(100%)") 44 | assert result == 100 45 | 46 | 47 | def test_Mgroups(): 48 | """Test Mgroups()""" 49 | mg = analyze.Mgroups() 50 | mg.createNodes({"1", "2", "3"}) 51 | 52 | mg.relate(45, 88, "3", "1") 53 | mg.relate(46, 90, "3", "2") 54 | 55 | mg.set_tags() 56 | 57 | assert mg.d2dc() == [] 58 | assert mg.d2c() == [("3", "1"), ("3", "2")] 59 | assert mg.dc2c() == [] 60 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | mosspy 2 | beautifulsoup4 3 | lxml 4 | python-dotenv 5 | pytest -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="plagcheck", 8 | version="0.4", 9 | license="MIT", 10 | author="Bhupesh Varshney", 11 | author_email="varshneybhupesh@gmail.com", 12 | description="Moss Results scraper with powerful insights & analysis", 13 | keywords="moss plagiarism analysis cheat mosspy", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://codeclassroom.github.io/PlagCheck/", 17 | project_urls={ 18 | "Documentation": "https://plagcheck.readthedocs.io/en/latest/?badge=latest", 19 | "Source Code": "https://github.com/codeclassroom/PlagCheck", 20 | "Funding": "https://www.patreon.com/bePatron?u=18082750", 21 | "Say Thanks!": "https://github.com/codeclassroom/PlagCheck/issues/new?assignees=&labels=&template=---say-thank-you.md&title=", 22 | "Tracker": "https://github.com/codeclassroom/PlagCheck/issues", 23 | }, 24 | packages=setuptools.find_packages(), 25 | install_requires=[ 26 | 'mosspy', 27 | 'beautifulsoup4', 28 | 'lxml', 29 | ], 30 | classifiers=[ 31 | "Programming Language :: Python :: 3.6", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "License :: OSI Approved :: MIT License", 35 | 'Topic :: Software Development :: Build Tools', 36 | "Topic :: Education", 37 | "Topic :: Education", 38 | "Topic :: Software Development", 39 | "Topic :: Software Development :: Libraries", 40 | "Topic :: Software Development :: Libraries :: Python Modules", 41 | "Operating System :: OS Independent", 42 | 43 | ], 44 | python_requires='>=3.6', 45 | ) -------------------------------------------------------------------------------- /testfiles/test_java.java: -------------------------------------------------------------------------------- 1 | class FibonacciExample1{ 2 | public static void main(String args[]){ 3 | int n1=0,n2=1,n3,i,count=10; 4 | System.out.print(n1+" "+n2);//printing 0 and 1 5 | 6 | for(i=2;i