├── .all-contributorsrc
├── .github
├── FUNDING.yml
└── ISSUE_TEMPLATE
│ ├── ----bug-report.md
│ ├── ---feature-request.md
│ └── ---say-thank-you.md
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── _config.yml
├── demo.py
├── docs
├── about.md
├── changelog.md
├── index.md
├── insights.md
├── installation.md
├── moss.md
└── usage.md
├── mkdocs.yml
├── plagcheck
├── __init__.py
├── analyze.py
├── plagcheck.py
└── plagcheck_test.py
├── requirements-dev.txt
├── setup.py
└── testfiles
├── test_java.java
├── test_java2.java
├── test_java3.java
├── test_java4.java
├── test_java5.java
├── test_java6.java
├── test_java7.java
├── test_python.py
├── test_python2.py
└── test_python3.py
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | "README.md"
4 | ],
5 | "imageSize": 100,
6 | "commit": false,
7 | "contributors": [
8 | {
9 | "login": "vhsw",
10 | "name": "Alexey Dubrov",
11 | "avatar_url": "https://avatars3.githubusercontent.com/u/7099976?v=4",
12 | "profile": "https://github.com/vhsw",
13 | "contributions": [
14 | "code",
15 | "bug",
16 | "test"
17 | ]
18 | }
19 | ],
20 | "contributorsPerLine": 7,
21 | "projectName": "PlagCheck",
22 | "projectOwner": "codeclassroom",
23 | "repoType": "github",
24 | "repoHost": "https://github.com"
25 | }
26 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: bhupesh
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/----bug-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F41B Bug report"
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: 'bug'
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Additional context**
32 | Add any other context about the problem here.
33 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---feature-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F680 Feature request"
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: 'enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/---say-thank-you.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F49F Say thank you"
3 | about: Just say thanks if you liked PlagCheck
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | if you liked **PlagCheck** - please let us know. We'd love to hear from you!
11 |
12 | You can help me in any way possible
13 |
14 | - [ ] Give the repository a star ⭐️.
15 | - [ ] Help out with issues.
16 | - [ ] Share it with others.
17 | - [ ] Support me on [Patreon](https://www.patreon.com/bePatron?u=18082750).
18 |
19 | Thank you! 💐
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # custom
107 | /submission
108 | sample.py
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.6"
4 | - "3.7"
5 | - "3.8"
6 |
7 | install:
8 | - pip install -r requirements-dev.txt
9 | - pip install isort black flake8 pylint
10 | - pip install coveralls
11 | script:
12 | - pytest plagcheck
13 | - py.test plagcheck/plagcheck_test.py
14 | - isort --check-only --recursive plagcheck
15 | - black --check --diff plagcheck
16 | - flake8 plagcheck --max-line-length=88 --ignore=F401
17 | - pylint plagcheck --disable=bad-continuation,invalid-name,attribute-defined-outside-init,no-self-use,too-many-locals,too-few-public-methods
18 | after_success:
19 | - coveralls
20 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 |
4 | ## [0.4] - March 10, 2020
5 |
6 | ### Changed [⚠️ Breaking Changes]
7 | - `getShareScores` & `getInsights` have been decoupled from the check class, they now have to be imported separately.
8 | - Minor changes in the `analyze.py` module.
9 |
10 |
11 | ## [0.3] - Jan 1, 2020
12 |
13 | ### Added
14 |
15 | - New module `analyze.py` for Moss Results analysis
16 | - `getShareScores()` for returning frequency of shared files.
17 | - `addFile()` for adding files.
18 | - `addFilesByWildCard()` for submitting multiple files.
19 | - Support for adding base code using `addBaseCode()`.
20 |
21 | ### Changed
22 | - The plagcheck module is now more modularised. `check` is now a class.
23 | - `__get_line_numbers()` now runs in a new thread.
24 |
25 | ### Removed
26 | - `requests` as a dependency, network requests are now 50% faster.
27 |
28 |
29 | ## [0.2] - Nov 9, 2019
30 | - Minor Improvements
31 |
32 |
33 | ## [0.1] - Nov 3, 2019
34 | - Initial Release
35 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to PlagCheck
2 |
3 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
4 |
5 | Make sure you follow below guidelines before contributing.
6 |
7 | 1. Raise an issue before sending any PR.
8 | 2. Make you changes to `feature` branch.
9 | 3. See if there is already an open PR for the same issue.
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Bhupesh Varshney
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-exclude *_test.py
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PlagCheck ✅
2 |
3 | > Moss Results scraper with powerful insights & analysis 💡
4 |
5 | 
6 | [](https://travis-ci.org/codeclassroom/PlagCheck)
7 | 
8 | [](https://plagcheck.readthedocs.io/en/latest/?badge=latest)
9 | 
10 | 
11 |
12 |
13 | ## Installation
14 |
15 | Install using `pip` from PyPI
16 |
17 | ```bash
18 | pip install plagcheck
19 | ```
20 |
21 | or directly from GitHub if you cannot wait to test new features
22 |
23 | ```bash
24 | pip install git+https://github.com/codeclassroom/PlagCheck.git
25 | ```
26 |
27 | ## Usage
28 |
29 | ```python
30 |
31 | """Usage example"""
32 | import os
33 | import pprint
34 | from plagcheck.plagcheck import check, insights, share_scores
35 |
36 | from dotenv import load_dotenv
37 | load_dotenv()
38 |
39 | language = "java"
40 | userid = os.environ["USER_ID"]
41 |
42 |
43 | moss = check(language, userid)
44 |
45 | moss.addFilesByWildCard("testfiles/test_java*.java")
46 |
47 | # or moss.addFile("testfiles/test_python.py")
48 |
49 | moss.submit()
50 |
51 | print(moss.getHomePage())
52 |
53 | result = moss.getResults()
54 |
55 | pprint.pprint(result)
56 |
57 | # print potential distributor-culprit relationships
58 | pprint.pprint(insights(result))
59 | # print frequency of each shared solution
60 | pprint.pprint(share_scores(result))
61 |
62 | ```
63 |
64 | ## Documentation
65 |
66 | > [PlagCheck Documentation](https://plagcheck.readthedocs.io/en/latest/)
67 |
68 |
69 | ## Development
70 |
71 | ##### Prerequisites
72 | - Python 3.6+
73 | - virtualenv
74 |
75 | 1. Create virtual environment.
76 | ```bash
77 | virtualenv -p python3 venv && cd venv && source bin/activate
78 | ```
79 | 2. Clone the repository.
80 | ```bash
81 | git https://github.com/codeclassroom/PlagCheck.git
82 | ```
83 | 3. Install Dependencies.
84 | ```bash
85 | pip install -r requirements-dev.txt
86 | ```
87 | 4. Run tests.
88 | ```bash
89 | pytest plagcheck
90 | ```
91 | 5. Lint the project with
92 | ```bash
93 | flake8 plagcheck --max-line-length=88 --ignore=F401
94 | black --check --diff plagcheck
95 | ```
96 |
97 | ## 📝 Changelog
98 |
99 | See the [CHANGELOG.md](CHANGELOG.md) file for details.
100 |
101 |
102 | ## Author
103 |
104 | 👥 **Bhupesh Varshney**
105 |
106 | - Twitter: [@bhupeshimself](https://twitter.com/bhupeshimself)
107 | - DEV: [bhupesh](https://dev.to/bhupesh)
108 |
109 | [](https://forthebadge.com)
110 |
111 | ## 📜 License
112 |
113 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
114 |
115 | ## 👋 Contributing
116 |
117 | Please read the [CONTRIBUTING](CONTRIBUTING.md) guidelines for the process of submitting pull requests to us.
118 |
119 |
120 | ## Contributors ✨
121 |
122 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
123 |
124 |
125 |
126 |
131 |
132 |
133 |
134 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
135 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-merlot
2 | show_downloads: true
3 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | """Usage example"""
2 | import os
3 | import pprint
4 | from plagcheck.plagcheck import check, insights, share_scores
5 |
6 | from dotenv import load_dotenv
7 | load_dotenv()
8 |
9 | language = "java"
10 | userid = os.environ["USER_ID"]
11 |
12 |
13 | moss = check(language, userid)
14 |
15 | moss.addFilesByWildCard("testfiles/test_java*.java")
16 |
17 | # or moss.addFile("testfiles/test_python.py")
18 |
19 | moss.submit()
20 |
21 | print(moss.getHomePage())
22 |
23 | result = moss.getResults()
24 |
25 | pprint.pprint(result)
26 |
27 | # print potential distributor-culprit relationships
28 | pprint.pprint(insights(result))
29 | # print frequency of each shared solution
30 | pprint.pprint(share_scores(result))
31 |
--------------------------------------------------------------------------------
/docs/about.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | plagcheck was built by :
4 |
5 | 👥 **Bhupesh Varshney**
6 |
7 | - Twitter: [@bhupeshimself](https://twitter.com/bhupeshimself)
8 | - DEV: [bhupesh](https://dev.to/bhupesh)
9 | - GitHub: [Bhupesh-V](https://github.com/Bhupesh-V)
10 |
11 | ## 📝 License
12 |
13 | This project is licensed under the MIT License. See the [LICENSE](https://github.com/codeclassroom/PlagCheck/blob/master/LICENSE) file for details.
14 |
15 | ## 👋 Contributing
16 |
17 | Please read the [CONTRIBUTING](https://github.com/codeclassroom/PlagCheck/blob/master/CONTRIBUTING.md) guidelines for the process of submitting pull requests to us.
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 |
4 | ## [0.4] - March 10, 2020
5 |
6 | ### Changed [⚠️ Breaking Changes]
7 | - `getShareScores` & `getInsights` have been decoupled from the check class, they now have to be imported separately.
8 | - Minor changes in the `analyze.py` module.
9 |
10 |
11 | ## [0.3] - Jan 1, 2020
12 |
13 | ### Added
14 |
15 | - New module `analyze.py` for Moss Results analysis
16 | - `getShareScores()` for returning frequency of shared files.
17 | - `addFile()` for adding files.
18 | - `addFilesByWildCard()` for submitting multiple files.
19 | - Support for adding base code using `addBaseCode()`.
20 |
21 | ### Changed
22 | - The plagcheck module is now more modularised. `check` is now a class.
23 | - `__get_line_numbers()` now runs in a new thread.
24 |
25 | ### Removed
26 | - `requests` as a dependency, network requests are now 50% faster.
27 |
28 |
29 | ## [0.2] - Nov 9, 2019
30 | - Minor Improvements
31 |
32 |
33 | ## [0.1] - Nov 3, 2019
34 | - Initial Release
35 |
36 | # Releases
37 | See releases on [PyPi](https://pypi.org/project/plagcheck/#history)
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # PlagCheck ✅
2 |
3 | > Moss Results scraper with powerful insights & analysis 💡
4 |
5 | 
6 | [](https://travis-ci.org/codeclassroom/PlagCheck)
7 | 
8 | [](https://plagcheck.readthedocs.io/en/latest/?badge=latest)
9 | 
10 | 
--------------------------------------------------------------------------------
/docs/insights.md:
--------------------------------------------------------------------------------
1 | # Insights
2 |
3 | PlagCheck provides algorithmic analysis of Moss results.
4 |
5 | ### Terminologies
6 |
7 | ### 1. Node
8 | Nodes are results returned by Moss i.e every
9 | individual file.
10 |
11 | ### 2. Tags
12 | Tags are roles which a file serves i.e. a tag is
13 | a potential distributor or potential culprit or
14 | both.
15 |
16 | ### 3. M-group
17 | m-groups (moss-groups) are groups of solution which have similar code.
18 | For example A student who solves a programming problem may share their
19 | solution with 3 of his/her friends, that is a single m-group with 4 nodes.
20 |
21 | For example if you run [demo.py](https://github.com/codeclassroom/PlagCheck/blob/master/demo.py), `insights()` will return the following data:
22 | ```java
23 |
24 | {'DCtoC Paths': [('testfiles/test_java5.java', 'testfiles/test_java2.java'),
25 | ('testfiles/test_java4.java', 'testfiles/test_java2.java')],
26 | 'DtoC Paths': [('testfiles/test_java3.java', 'testfiles/test_java2.java'),
27 | ('testfiles/test_java3.java', 'testfiles/test_java.java'),
28 | ('testfiles/test_java7.java', 'testfiles/test_java6.java')],
29 | 'DtoDC Paths': [('testfiles/test_java3.java', 'testfiles/test_java5.java'),
30 | ('testfiles/test_java3.java', 'testfiles/test_java4.java')]}
31 |
32 | ```
33 |
34 | This analysis can be visualized into following _Disconnected Directed Graph_
35 |
36 | 
37 |
38 | We assign Tags to every individual Node.
39 |
40 | 1. D - Distributor
41 | Student(s) who distributed their
42 | code in a group.
43 | 2. C - Culprit
44 | Student(s) who copied the shared
45 | code.
46 | 3. DC - Both a Distributor & Culprit
47 |
48 | In the above depicted graph, there are 2 unique _m-groups_.
49 |
50 | 1. Group 1 : [1, 2, 3, 4, 5]
51 | 2. Group 2 : [7, 6]
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | Installing plagcheck is pretty simple, just run
4 |
5 | ```bash
6 | pip install plagcheck
7 | ```
8 |
9 | Install a specific verison
10 |
11 | ```bash
12 | pip install plagcheck==0.4
13 | ```
14 |
15 | or directly from GitHub if you cannot wait to test new features
16 |
17 | ```bash
18 | pip install git+https://github.com/codeclassroom/PlagCheck.git
19 | ```
20 |
21 | If you have a old version, update it using
22 |
23 | ```bash
24 | pip install --upgrade plagcheck
25 | ```
--------------------------------------------------------------------------------
/docs/moss.md:
--------------------------------------------------------------------------------
1 | ## Moss
2 | > Derived from [Reading the Results](http://moss.stanford.edu/general/format.html).
3 |
4 |
5 |
6 | ## [Tips](http://moss.stanford.edu/general/tips.html)
7 |
8 | - Moss is quite conservative about what it considers to be matching passages of code. If Moss says that two passages look alike, then they almost certainly look quite alike. Moss also excludes all code that appears in too many of the submitted programs. Thus, all matches reported by Moss fairly accurately approximate the signature of plagiarized code: a passage of similar code in two programs that does not also appear in very many other programs.
9 |
10 | - False positives are possible with Moss, as programs may legitimately share code (e.g., two programs making use of the same public-domain library). The higher-ranked pairs are more likely to be the result of plagiarism than the lower-ranked pairs. The recommended strategy is to start with the highest-ranked pair and work down the list until one finds that a large fraction of the reported matches are false positives.
11 |
12 | - Moss can be more accurate if a base file is supplied. The -b option to Moss supplies a base file of code that should be ignored if it appears in programs; Moss never considers code that appears in a base file to match any other code. If your results include many unintended matches, then it is best to place all legitimately shared code in a base file (e.g., instructor-supplied code, common libraries, etc.) and resubmit the query to the server.
13 |
14 | - Moss detects structural similarities in programs and nothing more; it has no idea why programs may be structurally similar. As noted above, there are reasons besides plagiarism that two programs may appear the same (e.g., they are both based on the same third program, such as instructor-supplied code for an assignment). Results from Moss cannot be taken as direct evidence of plagiarism---it is still necessary for someone to examine the programs and make a judgment.
15 |
16 | ## Credits
17 | Moss was written and is maintained by Alex Aiken, aiken@cs.stanford.edu.
18 |
19 | The HTML interface was conceived of and designed by Guido Malpohl (s_malpoh@ira.uka.de), the author of JPlag, a plagiarism detection system for Java programs.
20 |
21 | PlagCheck extracts information from the webpages for easier storing & analysis of results.
22 | Contact [varshneybhupesh@gmail.com](mailto:varshneybhupesh@gmail.com) for more info.
23 |
--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
3 | plagcheck provides the following classes & methods:
4 |
5 | ### check(files, lang, user_id)
6 |
7 | * **Parameters** :
8 | - files : Program Files.
9 | - lang : The Programming Language.
10 | - output : Moss UserID.
11 |
12 |
13 | **Demo**:
14 | ```python
15 |
16 | """Usage example"""
17 | import os
18 | import pprint
19 | from plagcheck.plagcheck import check, insights, share_scores
20 |
21 | from dotenv import load_dotenv
22 | load_dotenv()
23 |
24 | language = "java"
25 | userid = os.environ["USER_ID"]
26 |
27 |
28 | moss = check(language, userid)
29 |
30 | moss.addFilesByWildCard("testfiles/test_java*.java")
31 |
32 | # or moss.addFile("testfiles/test_python.py")
33 |
34 | moss.submit()
35 |
36 | print(moss.getHomePage())
37 |
38 | result = moss.getResults()
39 |
40 | pprint.pprint(result)
41 |
42 | # print potential distributor-culprit relationships
43 | pprint.pprint(insights(result))
44 | # print frequency of each shared solution
45 | pprint.pprint(share_scores(result))
46 |
47 | ```
48 |
49 | ### 1. submit()
50 | **Parameters** : `None`
51 | **Return Type** : `None`
52 | **Description**: Submits the program on Moss.
53 | **Demo**:
54 | ```python
55 |
56 | c.submit()
57 |
58 | ```
59 |
60 | ### 2. getHomePage()
61 | **Parameters** : `None`
62 | **Return Type** : `String`
63 | **Description**: Returns the Moss Result HomePage URL
64 | **Demo**:
65 | ```python
66 |
67 | c.getHomePage()
68 |
69 | ```
70 |
71 | ### 3. getResults()
72 | **Parameters** : `None`
73 | **Return Type** : `List`
74 | **Description**: Returns the scraped data from Moss Results .
75 | **Demo**:
76 | ```python
77 |
78 | c.getResults()
79 |
80 | ```
81 |
82 | *getResults()* returns the following list of dictionaries:
83 | ```json
84 | [
85 | {
86 | "file1":"filename1.py",
87 | "file2":"filename2.py",
88 | "percentage": 34,
89 | "no_of_lines_matched": 3,
90 | "lines_matched":[["2-3", "10-11"]]
91 | },
92 | ....
93 | ]
94 | ```
95 | Each dict item contains the following items:
96 |
97 | - **file1** & **file2** :
98 | The pair of file names that have similar code.
99 |
100 | - **percentage** :
101 | It is the the percentage of the code in one file considered to match code in the other file.
102 |
103 | - **lines_matched** :
104 | Lines Matched is approximately the number of lines of code that matched between 2 given files.
105 |
106 | For example :
107 | If ***lines_matched* is [['88-99','119-131']]**
108 |
109 | Then the line numbers 88-99 of *file1* matched with lines 119-131 of *file2*.
110 |
111 | lines_matched is a list of lists indicating all line number matches between 2 code files.
112 |
113 |
114 | > For both measures(*lines_matched* & *percentage*), higher numbers mean more code matches.
115 |
116 | ### 4. addFilesByWildCard()
117 | **Parameters** : `String`
118 | **Return Type** : `None`
119 | **Description**: Add multiple files.
120 | **Demo**:
121 | ```python
122 |
123 | c.addFilesByWildCard("testfiles/test_python*.py")
124 | # This will add all the files having names like, test_python2, test_python5 etc.
125 |
126 | ```
127 |
128 | ### 5. addFile()
129 | **Parameters** : `String`
130 | **Return Type** : `None`
131 | **Description**: Add a single file for submission.
132 | **Demo**:
133 | ```python
134 |
135 | c.addFile("testfiles/test_python.py")
136 |
137 | ```
138 |
139 | ### 6. addBaseCode()
140 | **Parameters** : `String`
141 | **Return Type** : `None`
142 | **Description**: Add an allowed code file which is use by Moss to ignore results matching with this file
143 | **Demo**:
144 | ```python
145 |
146 | c.addBaseCode("/base.py")
147 |
148 | ```
149 |
150 | - Moss normally reports all code
151 | that matches in pairs of files. When a base file is supplied,
152 | program code that also appears in the base file is not counted in matches.
153 | - A typical base file will include, for example, the instructor-supplied
154 | code for an assignment. Multiple Base files are allowed.
155 | - You should use a base file if it is convenient; base files improve results, but are not usually necessary for obtaining useful information.
156 |
157 |
158 |
159 | ### share_scores()
160 | **Parameters** : `Moss Results`(returned by `getResults()`)
161 | **Return Type** : `Dict`
162 | **Description**: Share Score is a utility which returns frequency of every individual file.
163 | **Demo**:
164 | ```python
165 |
166 | print(share_scores(moss_data))
167 |
168 | # Will return
169 | """
170 | {'testfiles/test_python.py': 2,
171 | 'testfiles/test_python2.py': 2,
172 | 'testfiles/test_python3.py': 2}
173 | """
174 | ```
175 | Share Score is basically the frequency of each file appearing in Moss Results.
176 | i.e Higher the frequency, the more is that solution "shared" by different files.
177 |
178 | ### insights()
179 | **Parameters** : `Moss Results`(returned by `getResults()`)
180 | **Return Type** : `Dict`
181 | **Description**: See [Insights](/insights).
182 | **Demo**:
183 | ```python
184 |
185 | print(insights(moss_data))
186 |
187 | ```
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: PlagCheck
2 | site_url: https://codeclassroom.github.io/PlagCheck/
3 | repo_url: https://github.com/codeclassroom/PlagCheck
4 | site_author: Bhupesh Varshney
5 | site_description: PlagCheck v0.3 Documentation
6 | copyright: © 2019, Bhupesh Varshney
7 | nav:
8 | - Documentation: index.md
9 | - Installation: installation.md
10 | - Usage: usage.md
11 | - PlagCheck Insights: insights.md
12 | - Moss: moss.md
13 | - Changelog: changelog.md
14 | - About: about.md
15 | theme: readthedocs
16 | markdown_extensions:
17 | - toc:
18 | permalink: "#"
--------------------------------------------------------------------------------
/plagcheck/__init__.py:
--------------------------------------------------------------------------------
1 | """The MOSS interface package for CodeClassroom"""
2 | from plagcheck.plagcheck import check, insights, share_scores
3 |
--------------------------------------------------------------------------------
/plagcheck/analyze.py:
--------------------------------------------------------------------------------
1 | """
2 | analyze.py
3 | ====================================
4 | The Analysis Module for Moss
5 | (For info. on how this works contact varshneybhupesh@gmail.com)
6 | """
7 |
8 |
9 | class Node:
10 | """A Single Submitted file"""
11 |
12 | def __init__(self, name):
13 | self.name = name
14 | self.tag = None
15 | self.links = []
16 |
17 | def pointTo(self):
18 | """Return all nodes a node points to"""
19 | return [link.name for link in self.links]
20 |
21 |
22 | class Mgroups:
23 | """A Disconnected directed graph consisting all individual solutions"""
24 |
25 | linkCount = 0
26 |
27 | def __init__(self):
28 | self.nodes = []
29 | self.nodeCount = 0
30 |
31 | def relate(self, P1, P2, node1, node2):
32 | """Set a path between two file nodes"""
33 | node_obj_dict = {}
34 |
35 | for r in self.nodes:
36 | node_obj_dict[r.name] = r
37 |
38 | if node1 in node_obj_dict.keys() and node2 in node_obj_dict.keys():
39 | if P1 < P2:
40 | node_obj_dict[node1].links.append(node_obj_dict[node2])
41 | elif P1 > P2:
42 | node_obj_dict[node2].links.append(node_obj_dict[node1])
43 | else:
44 | node_obj_dict[node1].links.append(node_obj_dict[node2])
45 | node_obj_dict[node2].links.append(node_obj_dict[node1])
46 |
47 | Mgroups.linkCount += 1
48 |
49 | def __indegree(self, node: Node):
50 | indegree_count = 0
51 | for n in self.nodes:
52 | for link in n.pointTo():
53 | if link == node.name:
54 | indegree_count += 1
55 | return indegree_count
56 |
57 | def __outdegree(self, node: Node):
58 | return len(node.links)
59 |
60 | def set_tags(self):
61 | """Assign appropriate tag to a Node"""
62 | for node in self.nodes:
63 | in_degree = self.__indegree(node)
64 | out_degree = self.__outdegree(node)
65 |
66 | if in_degree == 0 and out_degree > 0:
67 | node.tag = "D"
68 | elif in_degree > 0 and out_degree == 0:
69 | node.tag = "C"
70 | elif in_degree != 0 and out_degree != 0:
71 | node.tag = "DC"
72 |
73 | def createNodes(self, node_set: set):
74 | """Create multiple nodes at the same time"""
75 | for n in node_set:
76 | self.addNode(n)
77 |
78 | def addNode(self, name: str):
79 | """Add a single node to graph"""
80 | if name not in [r.name for r in self.nodes]:
81 | node = Node(name)
82 | self.nodes.append(node)
83 | self.nodeCount += 1
84 | return node
85 |
86 | def displayNodes(self):
87 | """Return all nodes in the graph"""
88 | return [r.name for r in self.nodes]
89 |
90 | def displayTags(self):
91 | """Display Nodes with their Tags"""
92 | for node in self.nodes:
93 | print("{}, tag = {}".format(node.name, node.tag))
94 |
95 | def d2c(self):
96 | """All Direct Distributor to Culprit paths"""
97 | paths = []
98 | for node in self.nodes:
99 | for link in node.links:
100 | if node.tag == "D" and link.tag == "C":
101 | paths.append(tuple((node.name, link.name)))
102 | return paths
103 |
104 | def d2dc(self):
105 | """All Direct Distributor to potential Distributor-Culprit paths"""
106 | paths = []
107 | for node in self.nodes:
108 | for link in node.links:
109 | if node.tag == "D" and link.tag == "DC":
110 | paths.append(tuple((node.name, link.name)))
111 | return paths
112 |
113 | def dc2c(self):
114 | """All potential Distributor-Culprit to direct Culprit paths"""
115 | paths = []
116 | for node in self.nodes:
117 | for link in node.links:
118 | if node.tag == "DC" and link.tag == "C":
119 | paths.append(tuple((node.name, link.name)))
120 | return paths
121 |
122 | def __repr__(self):
123 | """Pretty prints the graph"""
124 | paths = ""
125 | for node in self.nodes:
126 | for link in node.links:
127 | paths += "{0} --> {1}\n".format(node.name, link.name)
128 | return paths
129 |
--------------------------------------------------------------------------------
/plagcheck/plagcheck.py:
--------------------------------------------------------------------------------
1 | """The MOSS interface package for CodeClassroom"""
2 | import collections
3 | import re
4 | import urllib.request
5 | from concurrent.futures import ThreadPoolExecutor
6 | from typing import List, Tuple
7 |
8 | import mosspy
9 | from bs4 import BeautifulSoup as bs
10 |
11 | from plagcheck.analyze import Mgroups
12 |
13 | HEADERS = {"User-Agent": "Mozilla/5.0"}
14 |
15 |
16 | class Result(dict):
17 | """Typing for moss results"""
18 |
19 | file1: str
20 | file2: str
21 | percentage: int
22 | no_of_lines_matched: int
23 | lines_matched: List[List[str]]
24 |
25 |
26 | Results = List[Result]
27 |
28 |
29 | def perc_str_to_int(string: str) -> int:
30 | """Convert string like "(42%)" to 42"""
31 | match = re.search(r"\((\d+)%\)$", string)
32 | if match:
33 | return int(match.group(1))
34 | raise ValueError("Cannot find percentage in table")
35 |
36 |
37 | def request(url: str):
38 | """Request Webpage"""
39 | req = urllib.request.Request(url, headers=HEADERS)
40 | with urllib.request.urlopen(req) as response:
41 | req = response.read()
42 |
43 | return req.decode("utf-8")
44 |
45 |
46 | def share_scores(moss_data: dict) -> dict:
47 | """Share Score Insights"""
48 | similar_code_files = []
49 | for result in moss_data:
50 | similar_code_files.append(result["file1"])
51 | similar_code_files.append(result["file2"])
52 |
53 | # frequency of files which are similar
54 | share_score = collections.Counter(similar_code_files)
55 |
56 | return dict(share_score)
57 |
58 |
59 | def insights(moss_data: dict) -> dict:
60 | """Analysis for Moss"""
61 | mg = Mgroups()
62 | similar_code_files = set()
63 | insights = {}
64 |
65 | for r in moss_data:
66 | similar_code_files.add(r["file1"])
67 | similar_code_files.add(r["file2"])
68 |
69 | mg.createNodes(similar_code_files)
70 |
71 | for r in moss_data:
72 | mg.relate(
73 | r["percentage_file1"], r["percentage_file2"], r["file1"], r["file2"]
74 | )
75 |
76 | mg.set_tags()
77 |
78 | insights["DtoC Paths"] = mg.d2c()
79 | insights["DtoDC Paths"] = mg.d2dc()
80 | insights["DCtoC Paths"] = mg.dc2c()
81 |
82 | return insights
83 |
84 |
85 | class check:
86 | """
87 | Args:
88 | - Program Files (list)
89 | - Language (str)
90 | - Moss User ID (str)
91 | """
92 |
93 | def __init__(self, lang: str, user_id: str):
94 |
95 | self.__user_id = user_id
96 | languages = mosspy.Moss.languages
97 | if lang not in languages:
98 | raise ValueError(f"{lang} is not a supported language {languages}")
99 | self.lang = lang
100 | self.__moss = mosspy.Moss(self.__user_id, self.lang)
101 |
102 | def __extract_info(self) -> Results:
103 | """Scrape the webpage for file names, percentage match etc."""
104 | results: Results = []
105 |
106 | response = request(self.home_url)
107 |
108 | html = bs(response, "lxml")
109 | table = html.find("table")
110 | for row in table.find_all("tr")[1:]:
111 | col1, col2, col3 = row.find_all("td")
112 | filename1, perc1 = col1.text.strip().split()
113 | filename2, perc2 = col2.text.strip().split()
114 |
115 | with ThreadPoolExecutor() as executor:
116 | future = executor.submit(self.__get_line_numbers, col1.a.get("href"))
117 | lines = future.result()
118 |
119 | result_dict = Result(
120 | file1=filename1,
121 | file2=filename2,
122 | percentage_file1=perc_str_to_int(perc1),
123 | percentage_file2=perc_str_to_int(perc2),
124 | no_of_lines_matched=int(col3.text.strip()),
125 | lines_matched=lines,
126 | )
127 | results.append(result_dict)
128 | return results
129 |
130 | def __get_line_numbers(self, url: str) -> List[List[str]]:
131 | """Get Line Numbers which are same"""
132 | list_of_line_nos: List[List[str]] = []
133 | result_page = re.sub(r".html$", "-top.html", url)
134 |
135 | response = request(result_page)
136 |
137 | html = bs(response, "lxml")
138 | table = html.find("table")
139 | for row in table.find_all("tr")[1:]:
140 | matched_lines: List[str] = []
141 | for col in row.find_all("td"):
142 | line_nos: str = col.text.strip()
143 | if line_nos:
144 | matched_lines.append(line_nos)
145 | list_of_line_nos.append(matched_lines)
146 | return list_of_line_nos
147 |
148 | def addFilesByWildCard(self, files):
149 | """Add multiple files"""
150 | self.__moss.addFilesByWildcard(files)
151 |
152 | def addFile(self, file):
153 | """Add a single file for submission"""
154 | self.__moss.addFile(file)
155 |
156 | def addBaseCode(self, base_file: str):
157 | """Add basefile"""
158 | self.__moss.addBaseFile(base_file)
159 |
160 | def submit(self):
161 | """Submit files to the Moss Server"""
162 | url = self.__moss.send()
163 |
164 | self.home_url = url
165 | self.moss_results = self.__extract_info()
166 |
167 | def getHomePage(self):
168 | """Return Moss Results HomePage URL"""
169 | return self.home_url
170 |
171 | def getResults(self) -> Tuple[str, Results]:
172 | """Return the result as a list of dictionary"""
173 |
174 | return self.moss_results
175 |
--------------------------------------------------------------------------------
/plagcheck/plagcheck_test.py:
--------------------------------------------------------------------------------
1 | """Tests for the MOSS interface package for CodeClassroom"""
2 | from plagcheck import analyze, plagcheck
3 | from plagcheck.plagcheck import insights, share_scores
4 |
5 |
6 | def test_check():
7 | """General test"""
8 | language = "python"
9 | userid = "1"
10 | temp = plagcheck.check(language, userid)
11 | temp.addFile("testfiles/test_python.py")
12 | temp.addFile("testfiles/test_python2.py")
13 | temp.submit()
14 | results = temp.getResults()
15 | moss_insights = insights(results)
16 | moss_share_scores = share_scores(results)
17 |
18 | assert moss_share_scores == {
19 | "testfiles/test_python.py": 1,
20 | "testfiles/test_python2.py": 1,
21 | }
22 |
23 | assert moss_insights == {"DCtoC Paths": [], "DtoC Paths": [], "DtoDC Paths": []}
24 |
25 | assert results == [
26 | {
27 | "file1": "testfiles/test_python.py",
28 | "file2": "testfiles/test_python2.py",
29 | "lines_matched": [["1-3", "1-3"]],
30 | "no_of_lines_matched": 3,
31 | "percentage_file1": 90,
32 | "percentage_file2": 90,
33 | },
34 | ]
35 |
36 |
37 | def test_perc_str_to_int():
38 | """Test string parsing"""
39 | result = plagcheck.perc_str_to_int("(0%)")
40 | assert result == 0
41 | result = plagcheck.perc_str_to_int("(42%)")
42 | assert result == 42
43 | result = plagcheck.perc_str_to_int("(100%)")
44 | assert result == 100
45 |
46 |
47 | def test_Mgroups():
48 | """Test Mgroups()"""
49 | mg = analyze.Mgroups()
50 | mg.createNodes({"1", "2", "3"})
51 |
52 | mg.relate(45, 88, "3", "1")
53 | mg.relate(46, 90, "3", "2")
54 |
55 | mg.set_tags()
56 |
57 | assert mg.d2dc() == []
58 | assert mg.d2c() == [("3", "1"), ("3", "2")]
59 | assert mg.dc2c() == []
60 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | mosspy
2 | beautifulsoup4
3 | lxml
4 | python-dotenv
5 | pytest
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 | setuptools.setup(
7 | name="plagcheck",
8 | version="0.4",
9 | license="MIT",
10 | author="Bhupesh Varshney",
11 | author_email="varshneybhupesh@gmail.com",
12 | description="Moss Results scraper with powerful insights & analysis",
13 | keywords="moss plagiarism analysis cheat mosspy",
14 | long_description=long_description,
15 | long_description_content_type="text/markdown",
16 | url="https://codeclassroom.github.io/PlagCheck/",
17 | project_urls={
18 | "Documentation": "https://plagcheck.readthedocs.io/en/latest/?badge=latest",
19 | "Source Code": "https://github.com/codeclassroom/PlagCheck",
20 | "Funding": "https://www.patreon.com/bePatron?u=18082750",
21 | "Say Thanks!": "https://github.com/codeclassroom/PlagCheck/issues/new?assignees=&labels=&template=---say-thank-you.md&title=",
22 | "Tracker": "https://github.com/codeclassroom/PlagCheck/issues",
23 | },
24 | packages=setuptools.find_packages(),
25 | install_requires=[
26 | 'mosspy',
27 | 'beautifulsoup4',
28 | 'lxml',
29 | ],
30 | classifiers=[
31 | "Programming Language :: Python :: 3.6",
32 | "Programming Language :: Python :: 3.7",
33 | "Programming Language :: Python :: 3.8",
34 | "License :: OSI Approved :: MIT License",
35 | 'Topic :: Software Development :: Build Tools',
36 | "Topic :: Education",
37 | "Topic :: Education",
38 | "Topic :: Software Development",
39 | "Topic :: Software Development :: Libraries",
40 | "Topic :: Software Development :: Libraries :: Python Modules",
41 | "Operating System :: OS Independent",
42 |
43 | ],
44 | python_requires='>=3.6',
45 | )
--------------------------------------------------------------------------------
/testfiles/test_java.java:
--------------------------------------------------------------------------------
1 | class FibonacciExample1{
2 | public static void main(String args[]){
3 | int n1=0,n2=1,n3,i,count=10;
4 | System.out.print(n1+" "+n2);//printing 0 and 1
5 |
6 | for(i=2;i