├── .gitignore
├── .gitattributes
├── GraphOfDocs
├── images
│ ├── settings.jpg
│ ├── GraphofDocs.jpg
│ └── feature_selection.jpg
├── evaluation_results
│ ├── image_20news_jira.jpg
│ ├── reuters
│ │ ├── REUTERS_feature_selection_2NN.jpg
│ │ ├── REUTERS_feature_selection_5NN.jpg
│ │ ├── REUTERS_feature_selection_LR.jpg
│ │ ├── REUTERS_feature_selection_NB.jpg
│ │ ├── REUTERS_feature_selection_1KNN.jpg
│ │ ├── REUTERS_feature_selection_LSVM.jpg
│ │ ├── REUTERS_feature_selection_1KNN_0_1.jpg
│ │ ├── REUTERS_feature_selection_2NN_0_1.jpg
│ │ ├── REUTERS_feature_selection_5NN_0_1.jpg
│ │ ├── REUTERS_feature_selection_LR_0_1.jpg
│ │ ├── REUTERS_feature_selection_LSVM_0_1.jpg
│ │ ├── REUTERS_feature_selection_NB_0_1.jpg
│ │ ├── REUTERS_feature_selection_NN100x50.jpg
│ │ ├── REUTERS_feature_selection_NN100x50_0_1.jpg
│ │ └── REUTERS_evaluation_results.csv
│ ├── lingspam
│ │ ├── LINGSPAM_feature_selection_2NN.jpg
│ │ ├── LINGSPAM_feature_selection_5NN.jpg
│ │ ├── LINGSPAM_feature_selection_LR.jpg
│ │ ├── LINGSPAM_feature_selection_NB.jpg
│ │ ├── LINGSPAM_feature_selection_1KNN.jpg
│ │ ├── LINGSPAM_feature_selection_LR_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_LSVM.jpg
│ │ ├── LINGSPAM_feature_selection_NB_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_1KNN_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_2NN_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_5NN_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_LSVM_0_1.jpg
│ │ ├── LINGSPAM_feature_selection_NN100x50.jpg
│ │ ├── LINGSPAM_feature_selection_NN100x50_0_1.jpg
│ │ ├── LINGSPAM_evaluation_results.csv
│ │ └── lingspam_results.txt
│ ├── amazon_sentiment
│ │ ├── AMAZON_feature_selection_LR.jpg
│ │ ├── AMAZON_feature_selection_NB.jpg
│ │ ├── AMAZON_feature_selection_1KNN.jpg
│ │ ├── AMAZON_feature_selection_2NN.jpg
│ │ ├── AMAZON_feature_selection_5NN.jpg
│ │ ├── AMAZON_feature_selection_LSVM.jpg
│ │ ├── AMAZON_feature_selection_2NN_0_1.jpg
│ │ ├── AMAZON_feature_selection_5NN_0_1.jpg
│ │ ├── AMAZON_feature_selection_LR_0_1.jpg
│ │ ├── AMAZON_feature_selection_NB_0_1.jpg
│ │ ├── AMAZON_feature_selection_1KNN_0_1.jpg
│ │ ├── AMAZON_feature_selection_LSVM_0_1.jpg
│ │ ├── AMAZON_feature_selection_NN100x50.jpg
│ │ ├── AMAZON_feature_selection_NN100x50_0_1.jpg
│ │ └── AMAZON_evaluation_results.csv
│ ├── jira_issues
│ │ ├── JIRAISSUES_feature_selection_2NN.jpg
│ │ ├── JIRAISSUES_feature_selection_5NN.jpg
│ │ ├── JIRAISSUES_feature_selection_LR.jpg
│ │ ├── JIRAISSUES_feature_selection_NB.jpg
│ │ ├── JIRAISSUES_feature_selection_1KNN.jpg
│ │ ├── JIRAISSUES_feature_selection_LR_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_LSVM.jpg
│ │ ├── JIRAISSUES_feature_selection_NB_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_1KNN_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_2NN_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_5NN_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_LSVM_0_1.jpg
│ │ ├── JIRAISSUES_feature_selection_NN100x50.jpg
│ │ ├── JIRAISSUES_feature_selection_NN100x50_0_1.jpg
│ │ └── JIRAISSUES_evaluation_results.csv
│ ├── 20newsgroups
│ │ ├── 20NEWSGROUPS_feature_selection_2NN.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_5NN.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_LR.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NB.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_1KNN.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_LSVM.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_1KNN_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_2NN_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_5NN_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_LR_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_LSVM_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NB_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NN100x50.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NN500x250.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NN100x50_0_1.jpg
│ │ ├── 20NEWSGROUPS_feature_selection_NN500x250_0_1.jpg
│ │ └── 20NEWSGROUPS_evaluation_results.csv
│ └── amazon_categories
│ │ ├── AMAZON_feature_selection_1KNN.jpg
│ │ ├── AMAZON_feature_selection_2NN.jpg
│ │ ├── AMAZON_feature_selection_5NN.jpg
│ │ ├── AMAZON_feature_selection_LR.jpg
│ │ ├── AMAZON_feature_selection_LSVM.jpg
│ │ ├── AMAZON_feature_selection_NB.jpg
│ │ ├── AMAZON_feature_selection_LR_0_1.jpg
│ │ ├── AMAZON_feature_selection_NB_0_1.jpg
│ │ ├── AMAZON_feature_selection_1KNN_0_1.jpg
│ │ ├── AMAZON_feature_selection_2NN_0_1.jpg
│ │ ├── AMAZON_feature_selection_5NN_0_1.jpg
│ │ ├── AMAZON_feature_selection_LSVM_0_1.jpg
│ │ ├── AMAZON_feature_selection_NN100x50.jpg
│ │ ├── AMAZON_feature_selection_NN100x50_0_1.jpg
│ │ └── AMAZON_evaluation_results.csv
├── __init__.py
├── web
│ └── custom
│ │ ├── custom.css
│ │ └── custom.js
├── neo4j_wrapper.py
├── config_experiments.py
├── parse_args.py
├── select.py
├── algos.py
├── visualize.html
├── parse_news.py
├── parse_issues.py
├── parse_reviews.py
├── utils.py
├── create.py
└── evaluation.py
├── requirements.txt
├── GraphOfDocs.py
├── README.md
├── experiments.py
└── LICENSE
/.gitignore:
--------------------------------------------------------------------------------
1 | env/
2 | __pycache__/
3 | Virtual Environment/
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | GraphOfDocs/web/vendor/* linguist-vendored
2 |
--------------------------------------------------------------------------------
/GraphOfDocs/images/settings.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/images/settings.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/images/GraphofDocs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/images/GraphofDocs.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/images/feature_selection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/images/feature_selection.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/image_20news_jira.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/image_20news_jira.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LR.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LR.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NB.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_2NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_2NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_5NN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_5NN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/reuters/REUTERS_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_1KNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_1KNN.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LSVM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LSVM.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LR_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LR_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NB_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NB_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_2NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_2NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_5NN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_5NN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_1KNN_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_1KNN_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LSVM_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_LSVM_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NN100x50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NN100x50.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN500x250.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN500x250.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN500x250_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_feature_selection_NN500x250_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NN100x50_0_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NC0DER/GraphOfDocs/HEAD/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_feature_selection_NN100x50_0_1.jpg
--------------------------------------------------------------------------------
/GraphOfDocs/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | 'Neo4jDatabase',
3 | 'create_graph_of_words',
4 | 'run_initial_algorithms',
5 | 'create_similarity_graph',
6 | 'create_clustering_tags',
7 | 'generate_words',
8 | 'read_datasets',
9 | 'clear_screen',
10 | 'parser'
11 | ]
12 |
--------------------------------------------------------------------------------
/GraphOfDocs/web/custom/custom.css:
--------------------------------------------------------------------------------
1 | body, html {
2 | height: 100%;
3 | max-width: 100%;
4 | overflow-x: hidden;
5 | font-family: Consolas;
6 | }
7 |
8 | #viz {
9 | width: 100%;
10 | height: 700px;
11 | margin-top: 20px;
12 | border: 1px solid #ced4da;
13 | border-radius: .25rem;
14 | margin-bottom: 10px;
15 | }
16 |
17 | .form-group {
18 | min-width: 100%;
19 | }
20 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cycler==0.10.0
2 | joblib==0.14.1
3 | kiwisolver==1.1.0
4 | matplotlib==3.2.0
5 | neo4j==1.7.6
6 | neobolt==1.7.16
7 | neotime==1.7.4
8 | nltk==3.4.5
9 | numpy==1.18.1
10 | pandas==1.0.1
11 | pip==21.1
12 | prettytable==0.7.2
13 | pyparsing==2.4.6
14 | python-dateutil==2.8.1
15 | pytz==2019.3
16 | scikit-learn==0.22.2.post1
17 | scipy==1.4.1
18 | seaborn==0.10.0
19 | setuptools==45.3.0
20 | singledispatch==3.4.0.3
21 | six==1.14.0
22 |
--------------------------------------------------------------------------------
/GraphOfDocs/neo4j_wrapper.py:
--------------------------------------------------------------------------------
1 | from neo4j import GraphDatabase, CypherError, ServiceUnavailable
2 |
3 | class Neo4jDatabase(object):
4 | """
5 | Wrapper class to handle the database
6 | more efficiently, by abstracting repeating code.
7 | """
8 | def __init__(self, uri, user, password): # Open the database and authenticate.
9 | self._driver = GraphDatabase.driver(uri, auth=(user, password))
10 |
11 | def close(self):
12 | self._driver.close()
13 |
14 | def execute(self, query, mode): # Execute queries in the database.
15 | with self._driver.session() as session:
16 | if (mode == 'r'): # Reading query.
17 | result = session.read_transaction(self.__execute, query)
18 | elif(mode == 'w'): # Writing query.
19 | result = session.write_transaction(self.__execute, query)
20 | else:
21 | raise TypeError('Execution mode can either be (r)ead or (w)rite!')
22 | return result
23 |
24 | @staticmethod # private method.
25 | def __execute(tx, query):
26 | result = tx.run(query)
27 | try:
28 | return result.values() # Return node, relationship values in a list of tuples.
29 | except CypherError as err: pass # Handle the erroneous query instead of breaking the execution.
30 |
--------------------------------------------------------------------------------
/GraphOfDocs/config_experiments.py:
--------------------------------------------------------------------------------
1 | from sklearn.naive_bayes import MultinomialNB
2 | from sklearn.linear_model import LogisticRegression
3 | from sklearn.neural_network import MLPClassifier
4 | from sklearn.neighbors import KNeighborsClassifier
5 | from sklearn.svm import LinearSVC
6 |
7 | MIN_NUMBER_OF_DOCUMENTS_PER_SELECTED_COMMUNITY = 2
8 | DATASET_PATH = \
9 | r'C:\Users\USER\source\repos\GraphOfDocs\GraphOfDocs\datasets\amazon'
10 |
11 | PLOTS_PREFIX = 'AMAZON'
12 | EXPERIMENTAL_RESULTS_OUΤPUT_DIR = \
13 | r'C:\Users\USER\source\repos\GraphOfDocs\GraphOfDocs\experimental_results\amazon'
14 |
15 | # Feature selection
16 | VARIANCE_THRESHOLD = [0.0005, 0.001, 0.0015, 0.002, 0.003, 0.004, 0.005, 0.01]
17 | SELECT_KBEST_K = [350, 500, 1000, 2000, 3000, 4000, 5000, 6000, 7000]
18 |
19 | # Graph of docs feature selection.
20 | # Create a vocabulary with the TOP N words of each community of docs
21 | TOP_N_SELECTED_COMMUNITY_TERMS = [5, 10, 15, 20, 25, 50, 100, 250, 500]
22 |
23 | #VARIANCE_THRESHOLD = [0.0005]
24 | #SELECT_KBEST_K = [1000]
25 | #TOP_N_SELECTED_COMMUNITY_TERMS = [5]
26 |
27 | classifiers = [
28 | ('NB', MultinomialNB()),
29 | ('LR', LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')),
30 | ('5NN', KNeighborsClassifier(n_neighbors=5, weights='distance')),
31 | ('2NN', KNeighborsClassifier(n_neighbors=2, weights='distance')),
32 | ('1KNN', KNeighborsClassifier(n_neighbors=1, weights='distance')),
33 | ('LSVM', LinearSVC()),
34 | ('NN100x50', MLPClassifier(solver='adam', hidden_layer_sizes=(100, 50), random_state=42)),
35 | #('NN500x250', MLPClassifier(solver='adam', hidden_layer_sizes=(500, 250), random_state=42)),
36 | ]
37 |
38 | def extract_file_class(filename):
39 | return filename.split('_')[0].split('.')[1]
40 |
--------------------------------------------------------------------------------
/GraphOfDocs/parse_args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | """This script contains code for the command line argument parser."""
3 | parser = argparse.ArgumentParser(description = 'Create, reinitialize, analyze the graphofdocs model')
4 | parser.add_argument('-c', '--create', action = 'store_true',
5 | help = 'set this flag to create and initialize the graphofdocs model')
6 |
7 | parser.add_argument('-r', '--reinitialize', action = 'store_true',
8 | help = 'set this flag to reinitialize the graphofdocs model, '
9 | 'by re-running centrality, community detection and similarity algorithms')
10 |
11 | parser.add_argument('-dir', '--dirpath', nargs = 1, type = str,
12 | help = 'if create is set, '
13 | 'then specify a directory path, '
14 | 'containing plaintext files (documents).')
15 |
16 | parser.add_argument('-ws', '--window-size', nargs = 1, type = int,
17 | default = [4], choices = [2, 3, 4, 5, 6],
18 | help = 'if create is set, then set the window size')
19 |
20 | parser.add_argument('-e', '--extend-window', action = 'store_true',
21 | help = 'if create is set, then set this flag to '
22 | 'enable the sliding text window to extend '
23 | 'over words of different sentences. '
24 | '(Default behavior: Disabled)')
25 |
26 | parser.add_argument('-is', '--insert-stopwords', action = 'store_true',
27 | help = 'if create is set, then set this flag to '
28 | 'enable the insertion of stopwords from the text to the model. '
29 | '(Default behavior: Disabled)')
30 |
31 | parser.add_argument('-l', '--lemmatize', action = 'store_true',
32 | help = 'if create is set, then set this flag to '
33 | 'enable the lemmatization of terms of the text. '
34 | '(Default behavior: Disabled)')
35 |
36 | parser.add_argument('-s', '--stem', action = 'store_true',
37 | help = 'if create is set, then set this flag to '
38 | 'enable the stemming of terms of the text. '
39 | '(Default behavior: Disabled)')
40 |
--------------------------------------------------------------------------------
/GraphOfDocs/select.py:
--------------------------------------------------------------------------------
1 | """
2 | This script contains functions that
3 | select data from the Neo4j database.
4 | """
5 |
6 | def get_communities_filenames(database):
7 | """
8 | This function retrieves all filenames (and the file count)
9 | for every community of similar documents.
10 | """
11 | query = ('MATCH (d:Document) RETURN d.community, '
12 | 'collect(d.filename) AS files, '
13 | 'count(d.filename) AS file_count '
14 | 'ORDER BY file_count DESC')
15 | results = database.execute(query, 'r')
16 | return results
17 |
18 | def get_communities_tags(database, top_terms = None):
19 | """
20 | This function generates the most important terms that describe
21 | each community of similar documents, and returns them for all communities.
22 | """
23 | # Get all intersecting nodes of the speficied community,
24 | # ranked by their in-degree (which shows to how many documents they belong to).
25 | # and pagerank score in descending order.
26 | top_tags = {}
27 | query = ('MATCH p=((d:Document)-[:includes]->(w:Word)) '
28 | 'WITH d.community as community, w, count(p) as degree '
29 | 'WHERE degree > 1 '
30 | 'WITH community as com, w.key as word, w.pagerank as pagerank, degree as deg '
31 | 'ORDER BY com, deg DESC, pagerank DESC '
32 | 'RETURN com, collect([word, pagerank, deg])')
33 | communities = database.execute(query, 'r')
34 |
35 | # Get the top tags from the tags and scores list.
36 | for [community, tags_scores] in communities:
37 | # Get all top terms for this community.
38 | if top_terms is None:
39 | top_tags[community] = [tag[0] for tag in tags_scores]
40 | else:
41 | top_tags[community] = [tag[0] for tag in tags_scores[:top_terms]]
42 | return top_tags
43 |
44 | def get_word_digrams_by_filename(database, filename):
45 | query = (f'MATCH (d:Document {{filename: "{filename}"}})'
46 | '-[:includes]->(w1:Word)-[r:connects]->(w2:Word)'
47 | '<-[:includes]-(d) WHERE id(w1) < id(w2) '
48 | 'WITH w1.key AS source, w2.key AS target, r.weight AS weight '
49 | 'ORDER BY weight DESC RETURN collect([source, target, weight]) AS digrams')
50 | results = database.execute(query, 'r')
51 | return results
52 |
--------------------------------------------------------------------------------
/GraphOfDocs/algos.py:
--------------------------------------------------------------------------------
1 | """
2 | This script contains wrapper functions that
3 | call algorithms in the database,
4 | such as Pagerank, Louvain Community Detection,
5 | and Jaccard Similarity Measure.
6 | Their implementantions are located
7 | in the Neo4j Algorithms library.
8 | """
9 |
10 | def pagerank(database, node, edge, iterations, property, weight = ''):
11 | type_correct = all([isinstance(node, str),
12 | isinstance(edge, str),
13 | isinstance(iterations, int),
14 | isinstance(property, str),
15 | isinstance(weight, str)])
16 |
17 | if not type_correct:
18 | raise TypeError('All arguments should be strings, except iterations which should be int!')
19 |
20 | if weight: # If weight is not an empty str.
21 | weight = f', weightProperty: {weight}'
22 |
23 | query = (f'CALL algo.pageRank("{node}", "{edge}", '
24 | f'{{iterations: {iterations}, dampingFactor: 0.85, write: true, writeProperty: "{property}"'+ weight +'}) '
25 | 'YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty')
26 | database.execute(query, 'w')
27 | return
28 |
29 | def louvain(database, node, edge, property, weight = ''):
30 | type_correct = all([isinstance(node, str),
31 | isinstance(edge, str),
32 | isinstance(property, str),
33 | isinstance(weight, str)])
34 |
35 | if not type_correct:
36 | raise TypeError('All arguments should be strings!')
37 |
38 | if weight: # If weight is not an empty str.
39 | weight = ', weightProperty: "'+ weight +'"'
40 |
41 | query = (f'CALL algo.louvain("{node}", "{edge}", '
42 | f'{{direction: "BOTH", writeProperty: "{property}"'+ weight +'}) '
43 | 'YIELD nodes, communityCount, iterations, loadMillis, computeMillis, writeMillis')
44 | database.execute(query, 'w')
45 | return
46 |
47 | def jaccard(database, source, edge, target, cutoff, relationship, property):
48 | type_correct = all([isinstance(source, str),
49 | isinstance(edge, str),
50 | isinstance(target, str),
51 | isinstance(relationship, str),
52 | isinstance(property, str),
53 | isinstance(cutoff, float)])
54 |
55 | if not type_correct:
56 | raise TypeError('All arguments should be strings, except cutoff which should be a float!')
57 |
58 | query = (
59 | f'MATCH (d:{source})-[:{edge}]->(w:{target}) '
60 | 'WITH {item:id(d), categories: collect(id(w))} as data '
61 | 'WITH collect(data) as Data '
62 | f'CALL algo.similarity.jaccard(Data, {{topK: 1, similarityCutoff: {cutoff}, write: true, writeRelationshipType: "{relationship}", writeProperty: "{property}"}}) '
63 | 'YIELD nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100 '
64 | 'RETURN nodes, similarityPairs, write, writeRelationshipType, writeProperty, min, max, mean, p95 ')
65 | database.execute(query, 'w')
66 | return
--------------------------------------------------------------------------------
/GraphOfDocs/visualize.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Dataviz
7 |
8 |
9 |
10 |
11 |
14 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/GraphOfDocs/web/custom/custom.js:
--------------------------------------------------------------------------------
1 | //Viz is a global object and it is created once.
2 | var viz;
3 |
4 | $(document).ready(function () {
5 | var query = "MATCH (n:Word)-[r:connects]-(k) "
6 | + "WHERE n.pagerank > 90 "
7 | + "AND k.pagerank > 90 "
8 | + "AND n.pagerank < 200 "
9 | + "AND k.pagerank < 200 "
10 | + "RETURN n,r,k LIMIT 1000";
11 | draw(query);
12 | });
13 |
14 | $("#query").click(function () {
15 | var start = $("#field1").val();
16 | var end = $("#field2").val();
17 | var score = $("#field3").val();
18 | if (start === "" || end === "" || score === "") {
19 | alert("Please speficy the ranges and/or score!");
20 | return;
21 | }
22 | // Build the query based on the above values.
23 | var query = "MATCH (n:Word)-[r:connects]-(k) "
24 | + "WHERE n.pagerank > " + start + " "
25 | + "AND k.pagerank > " + start + " "
26 | + "AND n.pagerank < " + end + " "
27 | + "AND k.pagerank < " + end + " "
28 | + "AND r.weight >= " + score + " "
29 | + "RETURN n,r,k LIMIT 1000";
30 | viz.renderWithCypher(query);
31 | });
32 |
33 | $("#stabilize").click(function () {
34 | viz.stabilize();
35 | })
36 |
37 | $("#textarea").keyup(function (e) {
38 | var code = e.keyCode ? e.keyCode : e.which;
39 | if (code === 13) { // Enter key pressed.
40 | var query = $("#textarea").val().replace(/\r?\n|\r/g, "");
41 | // Set value back to retain the query
42 | // without any newline characters.
43 | $("#textarea").val(query);
44 | if (query === ""){
45 | alert("Please supply a query!");
46 | return;
47 | }
48 | viz.renderWithCypher(query);
49 | return;
50 | }
51 | });
52 |
53 | function draw(query) {
54 | // Create a config object for viz.
55 | var config = {
56 | container_id: "viz",
57 | server_url: "bolt://localhost:7687",
58 | server_user: "neo4j",
59 | server_password: "123",
60 | labels: {
61 | "Word": {
62 | caption: "key",
63 | size: "pagerank",
64 | community: "community"
65 | },
66 | "Document":{
67 | caption: "filename",
68 | size: "none",
69 | community: "community"
70 | }
71 | },
72 | relationships: {
73 | "connects": {
74 | caption: "weight",
75 | thickness: "weight"
76 | },
77 | "includes": {
78 | caption: "none",
79 | thickness: "none"
80 | },
81 | "is_similar": {
82 | caption: "score",
83 | thickness: "score"
84 | },
85 | "has_tag": {
86 | caption: "none",
87 | thickness: "none"
88 | }
89 | },
90 | initial_cypher: query
91 | }
92 | viz = new NeoVis.default(config);
93 | viz.render();
94 | return
95 | }
96 |
--------------------------------------------------------------------------------
/GraphOfDocs/parse_news.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import string
3 | import platform
4 | from pathlib import Path
5 | from utils import clear_screen
6 | from parse_reviews import find_all, get_tag_value
7 |
8 | """
9 | Function that read an sgml-like-syntax file containing multiple reuters news sgml tags,
10 | which are processed to have their plaintext extracted, without any metadata.
11 | Finally, each news story is being written into its own file, in the output directory.
12 | """
13 | def convert_sgml_news_to_files(filepath):
14 | current_system = platform.system()
15 | with open(filepath, 'rt', encoding = 'utf-8-sig', errors = 'ignore') as file:
16 | # Read file contents and remove newline characters.
17 | text = file.read().replace('\n', ' ').replace('\r', '')
18 | # Remove non-printable characters from string.
19 | text = ''.join(filter(lambda x: x in string.printable, text))
20 |
21 | count = 1
22 | total_count = text.count('
25 | for start, end in zip(find_all(text, '')):
26 | # Print the number of the currently processed news story.
27 | print(f'Processing {count} out of {total_count} news stories...')
28 | # Adjust the index to point after the starting tag.
29 | start = start + offset
30 |
31 | # Retrieve all content from these sgml tags.
32 | topics = get_tag_value(text, 'TOPICS', start, end)
33 | title = get_tag_value(text, 'TITLE', start, end)
34 | news_text = get_tag_value(text, 'BODY', start, end)
35 | # If the news story lacks these fields proceed to the next one.
36 | if topics is None or title is None or news_text is None:
37 | count = count + 1
38 | clear_screen(current_system)
39 | continue
40 |
41 | # Remove tags that encapsulate topic values, and separate them with periods.
42 | topics = topics.replace('', '').replace('', '.')
43 | # Remove last period separator from string.
44 | topics = topics[:-1]
45 | # Join all plaintext information on a single string, then write it in a file.
46 | document_text = '\n'.join((title, news_text))
47 |
48 | # Each news story will have a filename consisting of _
49 | filename = '_'.join((topics, str(count)))
50 | # If the filename already exists, the file will be overwritten.
51 | # All files will be created in the default folder: output/
52 | # If the directory already exists, there will be raised no exception.
53 | Path('output').mkdir(exist_ok = True)
54 | with open(''.join(('output/', filename)), 'w') as document:
55 | document.write(document_text)
56 | count = count + 1
57 | # Clear the screen to output the update the progress counter.
58 | clear_screen(current_system)
59 | return
60 |
61 | if __name__ == '__main__':
62 | if(len(sys.argv) > 1):
63 | # Filepath is expected to be the 2nd argument.
64 | convert_sgml_news_to_files(sys.argv[1])
65 | else:
66 | print('Please input a file path, after parse_news.py.')
67 |
--------------------------------------------------------------------------------
/GraphOfDocs/parse_issues.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | import string
4 | import platform
5 | from pathlib import Path
6 | from utils import clear_screen
7 |
8 | """
9 | Function that reads an json file containing multiple jira issues,
10 | which are processed to have their plaintext extracted, without any metadata.
11 | Finally, each issue is being written into its own file, in the output directory.
12 | """
13 | def convert_json_issues_to_files(filepath):
14 | current_system = platform.system()
15 | # List that contains the chosen assignees
16 | assignees = [
17 | 'aantonenko', 'andrus', 'atkach', 'wesmckinn', 'julianhyde',
18 | 'andy.seaborne', 'purplecabbage', 'ababiichuk', 'jbellis',
19 | 'batik-dev@xmlgraphics.apache.org', 'djohnson', 'ancosen',
20 | 'elserj', 'bowserj', 'onechiporenko']
21 |
22 | with open(filepath, 'rt', encoding = 'utf-8-sig', errors = 'ignore') as dataset:
23 | # Load the json object in memory as a list of dictionaries.
24 | issues = json.load(dataset)['issues']
25 | count = 1
26 | skip = 0
27 | total_count = len(issues)
28 | # Iterate all issues.
29 | for issue in issues:
30 | # Retrieve all important fields from the dictionary.
31 | # Print the number of the currently processed issue.
32 | print(f'Processing {count} out of {total_count} issues...')
33 | issue_key = issue['key']
34 | issue_summary = ('' if issue['fields']['summary'] is None else issue['fields']['summary'])
35 | issue_description = ('' if issue['fields']['description'] is None else issue['fields']['description'])
36 | assignee_key = issue['fields']['assignee']['key']
37 | assignee_key = assignee_key.translate({ord(c): '' for c in '\'\"/*:?<>|_'})
38 |
39 | # Choose the top 15 assignees of the dataset.
40 | if assignee_key not in assignees:
41 | count = count + 1
42 | clear_screen(current_system)
43 | continue
44 |
45 | # Each issue will become a file.
46 | # The filename is derived from the following convention.
47 | # _
48 | file_name = '_'.join((assignee_key, issue_key))
49 | document_text = '\n'.join((issue_summary, issue_description))
50 | document_text = ''.join(filter(lambda x: x in string.printable, document_text))
51 |
52 | # Skip issues with empty text.
53 | if document_text == '':
54 | skip = skip + 1
55 | continue
56 |
57 | # If the filename already exists, the file will be overwritten.
58 | # All files will be created in the default folder: output/
59 | # If the directory already exists, there will be raised no exception.
60 | Path('output').mkdir(exist_ok = True)
61 | with open(''.join(('output/', file_name)), 'w') as document:
62 | document.write(document_text)
63 | count = count + 1
64 | # Clear the screen to output the update the progress counter.
65 | clear_screen(current_system)
66 | # Print Completed and skip items if any.
67 | print(f'Loaded {count - skip} issues, skipped {skip} empty items.')
68 | return
69 |
70 | if __name__ == '__main__':
71 | if(len(sys.argv) > 1):
72 | # Filepath is expected to be the 2nd argument.
73 | convert_json_issues_to_files(sys.argv[1])
74 | else:
75 | print('Please input a file path, after parse_issues.py.')
76 |
--------------------------------------------------------------------------------
/GraphOfDocs.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import platform
3 | from neo4j import ServiceUnavailable
4 | from GraphOfDocs.neo4j_wrapper import Neo4jDatabase
5 | from GraphOfDocs.utils import generate_words, read_dataset, clear_screen
6 | from GraphOfDocs.parse_args import parser
7 | from GraphOfDocs.create import *
8 |
9 | def graphofdocs(create, initialize, dirpath, window_size,
10 | extend_window, remove_stopwords, lemmatize, stem):
11 |
12 | # List that retains the skipped filenames.
13 | skipped = []
14 | current_system = platform.system()
15 | # Open the database.
16 | try:
17 | database = Neo4jDatabase('bolt://localhost:7687', 'neo4j', '123')
18 | # Neo4j server is unavailable.
19 | # This client app cannot open a connection.
20 | except ServiceUnavailable as error:
21 | print('\t* Neo4j database is unavailable.')
22 | print('\t* Please check the database connection before running this app.')
23 | input('\t* Press any key to exit the app...')
24 | sys.exit(1)
25 |
26 | if create:
27 | # Delete nodes from previous iterations.
28 | database.execute('MATCH (n) DETACH DELETE n', 'w')
29 |
30 | # Create uniqueness constraint on key to avoid duplicate word nodes.
31 | database.execute('CREATE CONSTRAINT ON (word:Word) ASSERT word.key IS UNIQUE', 'w')
32 |
33 | # Read text from files, which becomes a string in a list called dataset.
34 | dataset = read_dataset(dirpath)
35 | count = 1
36 | total_count = len(dataset)
37 | # Iterate all file records of the dataset.
38 | for filename, file in dataset:
39 | # Print the number of the currently processed file.
40 | print(f'Processing {count} out of {total_count} files...' )
41 | # Generate the terms from the text of each file.
42 | words = generate_words(file, extend_window, remove_stopwords, lemmatize, stem)
43 | # Create the graph of words in the database.
44 | value = create_graph_of_words(words, database, filename, window_size)
45 | if value is not None:
46 | skipped.append(value)
47 | # Update the progress counter.
48 | count = count + 1
49 | # Clear the screen to output the update the progress counter.
50 | clear_screen(current_system)
51 | # Count all skipped files and write their filenames in skipped.log
52 | skip_count = len(skipped)
53 | print(f'Created {total_count - skip_count}, skipped {skip_count} files.')
54 | print('Check skipped.log for info.')
55 | with open('skipped.log', 'w') as log:
56 | for item in skipped:
57 | log.write(item + '\n')
58 |
59 | if initialize:
60 | # Run initialization functions.
61 | run_initial_algorithms(database)
62 | create_similarity_graph(database)
63 | create_clustering_tags(database)
64 |
65 | database.close()
66 | return
67 |
68 | if __name__ == '__main__':
69 | # If only one argument is specified,
70 | # Then it's the script name.
71 | # Print help for using the script and exit.
72 | if len(sys.argv) == 1:
73 | parser.print_help()
74 | parser.exit()
75 |
76 | # Parse all arguments from terminal.
77 | args = parser.parse_args()
78 |
79 | # If create flag is set but no dirpath is specified, print error.
80 | if args.create and args.dirpath is None:
81 | parser.error('Please set the dirpath flag and specify a valid filepath!')
82 | # Else if create flag is specified along with a valid dirpath.
83 | elif args.create:
84 | print(args)
85 | # Run the graphofdocs function with create and initialize set to True.
86 | # The first argument (0th index) after the dirpath flag is the actual directory path.
87 | graphofdocs(True, True, args.dirpath[0], args.window_size[0],
88 | args.extend_window, args.insert_stopwords, args.lemmatize, args.stem)
89 | # Else if reinitialize flag is specified, unset the create flag.
90 | elif args.reinitialize:
91 | print(args)
92 | # Run the graphofdocs function with create set to False and initialize set to True.
93 | # We also set the directory path to None, since its not needed.
94 | graphofdocs(False, True, None, args.window_size[0],
95 | args.extend_window, args.insert_stopwords, args.lemmatize, args.stem)
96 |
--------------------------------------------------------------------------------
/GraphOfDocs/parse_reviews.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import string
4 | import platform
5 | from pathlib import Path
6 | from utils import clear_screen
7 |
8 | """
9 | Generator function to iterate over all occurences of substring in a string.
10 | """
11 | def find_all(string, sub):
12 | start = 0
13 | while True:
14 | start = string.find(sub, start)
15 | if start == -1: return
16 | yield start
17 | start += len(sub)
18 |
19 | """
20 | Function that extracts values, for a given type of xml tag, from an xml string.
21 | If the tag is empty then None is returned.
22 | """
23 | def get_tag_value(string, type, start, end):
24 | # Construct tags from tag type.
25 | xml_start_tag = ''.join(('<', type, '>'))
26 | xml_end_tag = ''.join(('', type, '>'))
27 |
28 | # Find the index of the starting tag.
29 | tag_start_idx = string.find(xml_start_tag, start, end)
30 | # Adjust the index to point after the starting tag.
31 | tag_start_idx = tag_start_idx + len(xml_start_tag)
32 | # Find the index of the ending tag.
33 | tag_end_idx = string.find(xml_end_tag, start, end)
34 |
35 | # Starting or ending tag not found, no value to be found.
36 | if tag_start_idx == -1 or tag_end_idx == -1:
37 | value = None
38 | # Both tags have been found but their distance is 0.
39 | # Which means, that they have no value between them.
40 | # E.g
41 | elif (tag_end_idx - tag_start_idx) == 0:
42 | value = None
43 | else:
44 | # Return the value from tags by slicing the string.
45 | value = string[tag_start_idx:tag_end_idx]
46 | return value
47 |
48 | """
49 | Function that read an xml-like-syntax file containing multiple amazon review xml tags,
50 | which are processed to have their plaintext extracted, without any metadata.
51 | Finally, each review is being written into its own file, in the output directory.
52 | """
53 | def convert_xml_reviews_to_files(filepath):
54 | current_system = platform.system()
55 | with open(filepath, 'rt', encoding = 'utf-8-sig', errors = 'ignore') as file:
56 | # Read file contents and remove newline characters.
57 | text = file.read().replace('\n', ' ').replace('\r', '')
58 | # Remove non-printable characters from string.
59 | text = ''.join(filter(lambda x: x in string.printable, text))
60 | # Find the filename, and remove the .review extension.
61 | filename = os.path.basename(filepath)
62 | if filename.endswith('.review'):
63 | filename = filename[:-7]
64 |
65 | count = 1
66 | total_count = text.count('')
67 | offset = len('')
68 | # Iterate through text that has matching xml tags of
69 | for start, end in zip(find_all(text, ''), find_all(text, '')):
70 | # Print the number of the currently processed review.
71 | print(f'Processing {count} out of {total_count} reviews...')
72 | # Adjust the index to point after the starting tag.
73 | start = start + offset
74 |
75 | # Retrieve all content from these xml tags.
76 | product_name = get_tag_value(text, 'product_name', start, end)
77 | title = get_tag_value(text, 'title', start, end)
78 | review_text = get_tag_value(text, 'review_text', start, end)
79 |
80 | # Join all review information on a single string, then write it in a file.
81 | document_text = '\n'.join((product_name, title, review_text))
82 |
83 | # If the filename already exists, the file will be overwritten.
84 | # All files will be created in the default folder: output/
85 | # If the directory already exists, there will be raised no exception.
86 | new_filename = '_'.join((filename, str(count)))
87 | Path('output').mkdir(exist_ok = True)
88 | with open(''.join(('output/', new_filename)), 'w') as document:
89 | document.write(document_text)
90 | count = count + 1
91 | # Clear the screen to output the update the progress counter.
92 | clear_screen(current_system)
93 | return
94 |
95 | if __name__ == '__main__':
96 | if(len(sys.argv) > 1):
97 | # Filepath is expected to be the 2nd argument.
98 | convert_xml_reviews_to_files(sys.argv[1])
99 | else:
100 | print('Please input a file path, after parse_reviews.py.')
101 |
--------------------------------------------------------------------------------
/GraphOfDocs/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This script contains utility functions
3 | e.g to read files, preprocess text, etc.
4 | """
5 | from os import system
6 | from os import listdir
7 | from os.path import isfile, join
8 | from string import punctuation, printable
9 | from nltk import pos_tag, sent_tokenize
10 | from nltk.corpus import wordnet, stopwords
11 | from nltk.stem.wordnet import WordNetLemmatizer
12 | from nltk.stem import PorterStemmer
13 | from nltk.tokenize import word_tokenize
14 |
15 | lemmatizer = WordNetLemmatizer() # Initialize lemmatizer once.
16 | stemmer = PorterStemmer() # Initialize Porter's stemmer once.
17 |
18 | stop_words = set(stopwords.words('english')).union([ # Augment the stopwords set.
19 | 'don','didn', 'doesn', 'aren', 'ain', 'hadn',
20 | 'hasn', 'mightn', 'mustn', 'couldn', 'shouldn',
21 | 'dont', 'didnt', 'doesnt', 'arent', 'aint',
22 | 'hadnt', 'hasnt', 'may', 'mightve', 'couldnt',
23 | 'shouldnt', 'shouldnot', 'shouldntve', 'mustnt',
24 | 'would', 'woulda', 'wouldany', 'wouldnot', 'woudnt',
25 | 'wouldve', 'must', 'could', 'can', 'have', 'has',
26 | 'do', 'does', 'did', 'are', 'is', 'ive', 'cant', 'thats',
27 | 'isnt', 'youre', 'wont', 'from', 'subject', 'hes', 'etc',
28 | 'edu', 'com', 'org', 've', 'll', 'd', 're', 't', 's'])
29 |
30 | def get_wordnet_tag(tag):
31 | """
32 | Function that maps default part-of-speech
33 | tags to wordnet part-of-speech tags.
34 | """
35 | if tag.startswith('J'):
36 | return wordnet.ADJ
37 | elif tag.startswith('V'):
38 | return wordnet.VERB
39 | elif tag.startswith('N'):
40 | return wordnet.NOUN
41 | elif tag.startswith('R'):
42 | return wordnet.ADV
43 | else: #default lemmatizer parameter
44 | return wordnet.NOUN
45 |
46 | def generate_words(text, extend_window = False, insert_stopwords = False, lemmatize = False, stem = False):
47 | """
48 | Function that generates words from a text corpus and optionally lemmatizes them.
49 | Returns a set of unique tokens based on order of appearance in-text.
50 | """
51 | # Remove all whitespace characters (by split) and join on space.
52 | text = ' '.join(text.split())
53 | # Handle special characters that connect words.
54 | text = text.translate({ord(c): '' for c in '\'\"'})
55 | # Find all end of sentences and introduce a special string to track them.
56 | # If they aren't tracked, then the window is allowed to be extended from one sentence to another,
57 | # thus connecting the last terms of one sentence with the starting ones of the next.
58 | # Also, by chaining the replace methods together, a slight amount of performance is achieved,
59 | # over other methods, that have the same output.
60 | if not extend_window:
61 | text = text.replace('. ', ' e5c ')\
62 | .replace('! ', ' e5c ' )\
63 | .replace('? ', ' e5c ' )
64 | # Translate punctuation to space and lowercase the string.
65 | text = text.translate({ord(c): ' ' for c in punctuation}).lower()
66 | # We are cleaning the data from stopwords, numbers and leftover syllabes/letters.
67 | if not insert_stopwords:
68 | tokens = [token for token in word_tokenize(text)
69 | if not token in stop_words and not token.isnumeric() and len(token) > 2]
70 | else:
71 | tokens = word_tokenize(text)
72 | if lemmatize:
73 | tokens_tags = pos_tag(tokens) # Create part-of-speech tags.
74 | # Overwrite the list with the lemmatized versions of tokens.
75 | tokens = [lemmatizer.lemmatize(token, get_wordnet_tag(tag)) for token, tag in tokens_tags]
76 | if stem:
77 | # Overwrite the list with the stemmed versions of tokens.
78 | tokens = [stemmer.stem(token) for token in tokens]
79 | return tokens
80 |
81 | def read_dataset(dirpath):
82 | """
83 | Function that gets a list of filenames in the directory specified by dirpath,
84 | then reading them in text mode, and appending them in a list which contains the file(name),
85 | and its contents, which have newline characters and non-printable characters removed.
86 | Handles newline endings of '\n' and '\r\n'.
87 | """
88 | data = []
89 | # Add trailing slash to directory path, if not present.
90 | dirpath = join(dirpath, '')
91 | files = [file for file in listdir(dirpath) if isfile(join(dirpath, file))]
92 | for file in files:
93 | with open(''.join([dirpath, file]), 'rt', encoding = 'utf-8-sig', errors = 'ignore') as fd:
94 | text = fd.read().replace('\n', ' ').replace('\r', '')
95 | text = ''.join(filter(lambda x: x in printable, text))
96 | data.append((file, text))
97 | return data
98 |
99 | def clear_screen(current_system):
100 | if current_system == 'Windows':
101 | system('cls')
102 | else:
103 | system('clear') # Linux/OS X.
104 | return
105 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Graph-of-docs Text Representation
2 |
3 | This repository hosts code for the papers:
4 | * [On a novel representation of multiple textual documents in a single graph (KES-IDT 2020)](https://link.springer.com/chapter/10.1007%2F978-981-15-5925-9_9) - [Download](https://github.com/NC0DER/GraphOfDocs/releases/tag/KES-IDT-2020)
5 | * [An innovative graph-based approach to advance feature selection from multiple textual documents (AIAI 2020)](https://link.springer.com/chapter/10.1007%2F978-3-030-49161-1_9) - [Download](https://github.com/NC0DER/GraphOfDocs/archive/master.zip)
6 |
7 | 
8 |
9 | ## Datasets
10 | Available in [this link](https://github.com/imis-lab/aiai-2020-datasets)
11 |
12 | ## Test Results
13 | Edit `GraphOfdocs/config_experiments.py` to setup the experiments and run `experiments.py`.
14 |
15 | ## Installation
16 | **Prequisites:**
17 | * `Windows 10` 64-bit / Debian based `Linux` 64-bit.
18 | * `Python 3` (min. version 3.6), `pip3` (& `py` launcher Windows-only).
19 | * Working `Neo4j` Database (min. version 3.5.12).
20 |
21 | ### Windows 10
22 | Download the project from the green button above, unzip it,
23 | and then open a cmd terminal to this folder and type `pip3 install -r requirements.txt`.
24 | This command will install the neccessary `Python` libraries\* to run the project.
25 |
26 | ### Debian Based Linux
27 | We ran the following commands to update `Python`, `git`,
28 | clone the project to a local folder and install the necessary `Python` libraries\*.
29 | ```bash
30 | sudo apt install python3.6
31 | sudo apt install git-all
32 | git clone https://github.com/NC0DER/GraphOfDocs
33 | cd GraphOfDocs
34 | pip3 install -r requirements.txt
35 | ```
36 | *\* Optionally you could create a virtual environment first,*
37 | *\* to isolate the libraries from your python user install.*
38 | *\* However the setup script doesn't downgrade existing libraries,*
39 | *\* so there's zero risk in affecting your local user install.*
40 |
41 | ## Database Setup (Windows / Linux)
42 | Create a new database from the `Neo4j` desktop app using 3.5.12 as the min. version.
43 | Update your memory settings to match the following values,
44 | and install the following extra plugins as depicted in the image.
45 | 
46 | *Hint: if you use a dedicated server that only runs `Neo4j`, you could increase these values,
47 | accordingly as specified in the comments of these parameters.*
48 |
49 | Run the `GraphOfDocs.py` script which will create thousands of nodes,
50 | and millions of relationships in the database.
51 | Once it's done, the database is initialized and ready for use.
52 |
53 | ## Running the app
54 | You could use the `Neo4j Browser` to run your queries,
55 | or for large queries you could use the custom visualization tool
56 | `visualize.html` which is located in the `GraphOfDocs` Subdirectory.
57 |
58 | ## Citation
59 | On a novel representation of multiple textual documents in a single graph (KES-IDT 2020) paper:
60 | ```
61 | Giarelis N., Kanakaris N., Karacapilidis N. (2020) On a Novel Representation of Multiple Textual Documents in a Single Graph. In: Czarnowski I., Howlett R., Jain L. (eds) Intelligent Decision Technologies. IDT 2020. Smart Innovation, Systems and Technologies, vol 193. Springer, Singapore
62 | ```
63 |
64 | ```
65 | @InProceedings{10.1007/978-981-15-5925-9_9,
66 | author="Giarelis, Nikolaos
67 | and Kanakaris, Nikos
68 | and Karacapilidis, Nikos",
69 | editor="Czarnowski, Ireneusz
70 | and Howlett, Robert J.
71 | and Jain, Lakhmi C.",
72 | title="On a Novel Representation of Multiple Textual Documents in a Single Graph",
73 | booktitle="Intelligent Decision Technologies",
74 | year="2020",
75 | publisher="Springer Singapore",
76 | address="Singapore",
77 | pages="105--115",
78 | abstract="This paper introduces a novel approach to represent multiple documents as a single graph, namely, the graph-of-docs model, together with an associated novel algorithm for text categorization. The proposed approach enables the investigation of the importance of a term into a whole corpus of documents and supports the inclusion of relationship edges between documents, thus enabling the calculation of important metrics as far as documents are concerned. Compared to well-tried existing solutions, our initial experimentations demonstrate a significant improvement of the accuracy of the text categorization process. For the experimentations reported in this paper, we used a well-known dataset containing about 19,000 documents organized in various subjects.",
79 | isbn="978-981-15-5925-9"
80 | }
81 | ```
82 |
83 | An innovative graph-based approach to advance feature selection from multiple textual documents (AIAI 2020) paper:
84 | ```
85 | Giarelis N., Kanakaris N., Karacapilidis N. (2020) An Innovative Graph-Based Approach to Advance Feature Selection from Multiple Textual Documents. In: Maglogiannis I., Iliadis L., Pimenidis E. (eds) Artificial Intelligence Applications and Innovations. AIAI 2020. IFIP Advances in Information and Communication Technology, vol 583. Springer, Cham
86 | ```
87 |
88 | ```
89 | @InProceedings{10.1007/978-3-030-49161-1_9,
90 | author="Giarelis, Nikolaos
91 | and Kanakaris, Nikos
92 | and Karacapilidis, Nikos",
93 | editor="Maglogiannis, Ilias
94 | and Iliadis, Lazaros
95 | and Pimenidis, Elias",
96 | title="An Innovative Graph-Based Approach to Advance Feature Selection from Multiple Textual Documents",
97 | booktitle="Artificial Intelligence Applications and Innovations",
98 | year="2020",
99 | publisher="Springer International Publishing",
100 | address="Cham",
101 | pages="96--106",
102 | abstract="This paper introduces a novel graph-based approach to select features from multiple textual documents. The proposed solution enables the investigation of the importance of a term into a whole corpus of documents by utilizing contemporary graph theory methods, such as community detection algorithms and node centrality measures. Compared to well-tried existing solutions, evaluation results show that the proposed approach increases the accuracy of most text classifiers employed and decreases the number of features required to achieve `state-of-the-art' accuracy. Well-known datasets used for the experimentations reported in this paper include 20Newsgroups, LingSpam, Amazon Reviews and Reuters.",
103 | isbn="978-3-030-49161-1"
104 | }
105 | ```
106 |
107 | ## Contributors
108 | * Nikolaos Giarelis (giarelis@ceid.upatras.gr)
109 | * Nikos Kanakaris (nkanakaris@upnet.gr)
110 | * Nikos Karacapilidis (karacap@upatras.gr)
111 |
--------------------------------------------------------------------------------
/experiments.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.warn = lambda *args, **kwards: None # Supress warnings.
3 |
4 | import sys
5 | import pandas as pd
6 | from sklearn.model_selection import train_test_split
7 | from sklearn.preprocessing import LabelEncoder
8 | from sklearn.utils import shuffle
9 | from neo4j import ServiceUnavailable
10 | from GraphOfDocs.neo4j_wrapper import Neo4jDatabase
11 | from GraphOfDocs import utils
12 | from GraphOfDocs import select
13 | from prettytable import PrettyTable
14 | from GraphOfDocs import config_experiments
15 | from GraphOfDocs import evaluation
16 | import timeit
17 |
18 | results_table = PrettyTable(
19 | ['Method', 'Accuracy', 'Number of features',
20 | 'Train size', 'Test size', 'Details'])
21 | evaluation_results = []
22 | feature_selection_evaluation_results = []
23 |
24 | start_time = timeit.default_timer()
25 | print('')
26 | print('%'*100)
27 | print('!START OF THE EXPERIMENT!')
28 | #print('DATASET DIR PATH: %s' % config_experiments.DATASET_PATH)
29 | print(f'DATASET DIR PATH: {config_experiments.DATASET_PATH}')
30 | print('MIN NUMBER OF DOCUMENTS PER SELECTED COMMUNITY: '
31 | f'{config_experiments.MIN_NUMBER_OF_DOCUMENTS_PER_SELECTED_COMMUNITY}')
32 | print('VARIANCE THRESHOLD: '
33 | f'{config_experiments.VARIANCE_THRESHOLD}')
34 | print('SELECT KBEST K: '
35 | f'{config_experiments.SELECT_KBEST_K}')
36 | print('TOP N SELECTED COMMUNITY TERMS: '
37 | f'{config_experiments.TOP_N_SELECTED_COMMUNITY_TERMS}')
38 |
39 | # Connect to database.
40 | try:
41 | database = Neo4jDatabase('bolt://localhost:7687', 'neo4j', '123')
42 | # Neo4j server is unavailable.
43 | # This client app cannot open a connection.
44 | except ServiceUnavailable as error:
45 | print('\t* Neo4j database is unavailable.')
46 | print('\t* Please check the database connection before running this app.')
47 | input('\t* Press any key to exit the app...')
48 | sys.exit(1)
49 | # Retrieve the communities of documents and their filenames.
50 | doc_communities = select.get_communities_filenames(database)
51 | # Keep only the communities with more than one documents.
52 | filtered_doc_communities = \
53 | [doc_community for doc_community in doc_communities
54 | if doc_community[2] >=
55 | config_experiments.MIN_NUMBER_OF_DOCUMENTS_PER_SELECTED_COMMUNITY]
56 | # Fetch the selected documents.
57 | selected_docs = sum([docs for _, docs, _ in filtered_doc_communities], [])
58 | # Map community id to documents.
59 | doc_communities_dict = {community_id: docs
60 | for community_id, docs, number_of_docs
61 | in filtered_doc_communities}
62 | # Map document to community id.
63 | doc_to_community_dict = {doc: community_id
64 | for community_id, doc_community, _
65 | in filtered_doc_communities for doc in doc_community}
66 | print(f'Number of selected documents: {len(selected_docs)}')
67 | # Read dataset, clean dataset and create a pandas dataframe of the dataset.
68 | dataset = utils.read_dataset(config_experiments.DATASET_PATH)
69 | # Create a label encoder (map classes to integer numbers).
70 | le = LabelEncoder()
71 | # The class of each document can be found by simply split (character '_') its filename. E.g. comp.sys.mac.hardware_51712.
72 | le.fit([config_experiments.extract_file_class(file[0]) for file in dataset])
73 | # Tuple: file identifier, file class, file class number, file text.
74 | clean_dataset = [(file[0],
75 | config_experiments.extract_file_class(file[0]),
76 | le.transform([config_experiments.extract_file_class(file[0])])[0],
77 | ' '.join(utils.generate_words(
78 | file[1],
79 | extend_window = True,
80 | insert_stopwords = False,
81 | lemmatize = False, stem = False)))
82 | for file in dataset]
83 | df = pd.DataFrame(clean_dataset,
84 | columns = ['identifier', 'class', 'class_number', 'text'])
85 | df_all = df
86 |
87 | # Keep only the selected documents (i.e. the document from the community with more than 1 documents).
88 | df = df[df['identifier'].isin(selected_docs)]
89 | df = shuffle(df, random_state = 42)
90 | print('EXAMPLE OF THE PANDAS DATAFRAME')
91 | print(df.head(2))
92 |
93 | # Number of unique classes
94 | print(f'Number of unique classes: {le.classes_.shape}')
95 |
96 | X = df['text']
97 | y = df['class_number']
98 | positions = [i for i in range(len(X))]
99 | positions_train, positions_test = train_test_split(
100 | positions, test_size =0.33, random_state = 42)
101 |
102 | res = evaluation.BOWEvaluator()\
103 | .evaluate(X, y, results_table = results_table,
104 | classifiers = config_experiments.classifiers)
105 | evaluation_results.extend(res)
106 |
107 | res = evaluation.MetaFeatureSelectionEvaluator()\
108 | .evaluate(X, y, results_table = results_table,
109 | classifiers = config_experiments.classifiers)
110 | evaluation_results.extend(res)
111 |
112 | for variance_threshold in config_experiments.VARIANCE_THRESHOLD:
113 | res = evaluation\
114 | .LowVarianceFeatureSelectionEvaluator(
115 | variance_threshold=variance_threshold)\
116 | .evaluate(X, y, results_table = results_table,
117 | classifiers = config_experiments.classifiers)
118 | evaluation_results.extend(res)
119 | feature_selection_evaluation_results.extend(res)
120 |
121 | for kbest_k in config_experiments.SELECT_KBEST_K:
122 | res = evaluation\
123 | .SelectKBestFeatureSelectionEvaluator(
124 | kbest=kbest_k)\
125 | .evaluate(X, y, results_table = results_table,
126 | classifiers=config_experiments.classifiers)
127 | evaluation_results.extend(res)
128 | feature_selection_evaluation_results.extend(res)
129 |
130 | evaluation.GraphOfDocsClassifier(
131 | doc_to_community_dict, doc_communities_dict)\
132 | .calculate_accuracy(df['identifier'], results_table = results_table)
133 |
134 | for top_n in config_experiments.TOP_N_SELECTED_COMMUNITY_TERMS:
135 | res = evaluation\
136 | .TopNOfEachCommunityEvaluator(
137 | top_n, doc_to_community_dict,
138 | doc_communities_dict)\
139 | .evaluate(
140 | X, y, df = df,
141 | positions_train = positions_train,
142 | database = database,
143 | results_table = results_table,
144 | classifiers = config_experiments.classifiers)
145 | evaluation_results.extend(res)
146 | feature_selection_evaluation_results.extend(res)
147 |
148 | df_evaluation_results = pd.DataFrame(evaluation_results)
149 | df_feature_selection_evaluation_results = \
150 | pd.DataFrame(feature_selection_evaluation_results)
151 | print('EXAMPLE OF THE EVALUATION RESULTS PANDAS DATAFRAME')
152 | print(df_evaluation_results.head(2))
153 |
154 | results_table.sortby = 'Accuracy'
155 | results_table.reversesort = True
156 | print(results_table)
157 |
158 | output_dir = config_experiments.EXPERIMENTAL_RESULTS_OUΤPUT_DIR
159 | plots_prefix = config_experiments.PLOTS_PREFIX
160 | df_evaluation_results.to_csv(f'{output_dir}/{plots_prefix}_evaluation_results.csv')
161 | evaluation.generate_plots(
162 | df_feature_selection_evaluation_results,
163 | output_dir = output_dir,
164 | plots_prefix = f'{plots_prefix}_feature_selection',
165 | show_only = False)
166 |
167 | database.close()
168 | stop_time = timeit.default_timer()
169 | print(f'Execution time: {stop_time - start_time}')
170 | print('!END OF THE EXPERIMENT!')
171 | print('%'*100)
172 | print('')
173 |
--------------------------------------------------------------------------------
/GraphOfDocs/create.py:
--------------------------------------------------------------------------------
1 | """
2 | This script contains functions that
3 | create data in the Neo4j database.
4 | """
5 | import platform
6 | from GraphOfDocs.utils import clear_screen
7 | from GraphOfDocs.algos import *
8 | from GraphOfDocs.select import get_communities_filenames, get_communities_tags
9 |
10 | # Initialize an empty set of edges.
11 | edges = {}
12 | # Initialize an empty list of unique terms.
13 | # We are using a list to preserver order of appearance.
14 | nodes = []
15 |
16 | def create_graph_of_words(words, database, filename, window_size = 4):
17 | """
18 | Function that creates a Graph of Words that contains all nodes from each document for easy comparison,
19 | inside the neo4j database, using the appropriate cypher queries.
20 | """
21 |
22 | # Files that have word length < window size, are skipped.
23 | # Window size ranges from 2 to 6.
24 | length = len(words)
25 | if (length < window_size):
26 | # Early exit, we return the skipped filename
27 | return filename
28 |
29 | # We are using a global set of edges to avoid creating duplicate edges between different graph of words.
30 | # Basically the co-occurences will be merged.
31 | global edges
32 |
33 | # We are using a global set of edges to avoid creating duplicate nodes between different graph of words.
34 | # A list is being used to respect the order of appearance.
35 | global nodes
36 |
37 | # We are getting the unique terms for the current graph of words.
38 | terms = []
39 | for word in words:
40 | if word not in terms:
41 | terms.append(word)
42 | # Remove end-of-sentence token, so it doesn't get created.
43 | if 'e5c' in terms:
44 | terms.remove('e5c')
45 | # If the word doesn't exist as a node, then create it.
46 | for word in terms:
47 | if word not in nodes:
48 | database.execute(f'CREATE (w:Word {{key: "{word}"}})', 'w')
49 | # Append word to the global node graph, to avoid duplicate creation.
50 | nodes.append(word)
51 |
52 |
53 |
54 | # Create unique connections between existing nodes of the graph.
55 | for i, current in enumerate(words):
56 | # If there are leftover items smaller than the window size, reduce it.
57 | if i + window_size > length:
58 | window_size = window_size - 1
59 | # If the current word is the end of sentence string,
60 | # we need to skip it, in order to go to the words of the next sentence,
61 | # without connecting words of different sentences, in the database.
62 | if current == 'e5c':
63 | continue
64 | # Connect the current element with the next elements of the window size.
65 | for j in range(1, window_size):
66 | next = words[i + j]
67 | # Reached the end of sentence string.
68 | # We can't connect words of different sentences,
69 | # therefore we need to pick a new current word,
70 | # by going back out to the outer loop.
71 | if next == 'e5c':
72 | break
73 | edge = (current, next)
74 | if edge in edges:
75 | # If the edge, exists just update its weight.
76 | edges[edge] = edges[edge] + 1
77 | query = (f'MATCH (w1:Word {{key: "{current}"}})-[r:connects]-(w2:Word {{key: "{next}"}}) '
78 | f'SET r.weight = {edges[edge]}')
79 | else:
80 | # Else, create it, with a starting weight of 1 meaning first co-occurence.
81 | edges[edge] = 1
82 | query = (f'MATCH (w1:Word {{key: "{current}"}}) '
83 | f'MATCH (w2:Word {{key: "{next}"}}) '
84 | f'MERGE (w1)-[r:connects {{weight: {edges[edge]}}}]-(w2)')
85 | # This line of code, is meant to be executed, in both cases of the if...else statement.
86 | database.execute(query, 'w')
87 |
88 | # Create a parent node that represents the document itself.
89 | # This node is connected to all words of its own graph,
90 | # and will be used for similarity/comparison queries.
91 | database.execute(f'CREATE (d:Document {{filename: "{filename}"}})', 'w')
92 | # Create a word list with comma separated, quoted strings for use in the Cypher query below.
93 | #word_list = ', '.join(f'"{word}"' for word in terms)
94 | query = (f'MATCH (w:Word) WHERE w.key IN {terms} '
95 | 'WITH collect(w) as words '
96 | f'MATCH (d:Document {{filename: "{filename}"}}) '
97 | 'UNWIND words as word '
98 | 'CREATE (d)-[:includes]->(word)')
99 | database.execute(query, 'w')
100 | return
101 |
102 | def run_initial_algorithms(database):
103 | """
104 | Function that runs centrality & community detection algorithms,
105 | in order to prepare the data for analysis and visualization.
106 | Pagerank & Louvain are used, respectively.
107 | The calculated score for each node of the algorithms is being stored
108 | on the nodes themselves.
109 | """
110 | # Append the parameter 'weight' for the weighted version of the algorithm.
111 | pagerank(database, 'Word', 'connects', 20, 'pagerank')
112 | louvain(database, 'Word', 'connects', 'community')
113 | return
114 |
115 | def create_similarity_graph(database):
116 | """
117 | Function that creates a similarity graph
118 | based on Jaccard similarity measure.
119 | This measure connects the document nodes with each other
120 | using the relationship 'is_similar',
121 | which has the similarity score as a property.
122 | In order to prepare the data for analysis and visualization,
123 | we use Louvain Community detection algorithm.
124 | The calculated community id for each node is being stored
125 | on the nodes themselves.
126 | """
127 | # Remove similarity edges from previous iterations.
128 | database.execute('MATCH ()-[r:is_similar]->() DELETE r', 'w')
129 |
130 | # Create the similarity graph using Jaccard similarity measure.
131 | jaccard(database, 'Document', 'includes', 'Word', 0.23, 'is_similar', 'score')
132 |
133 | # Find all similar document communities.
134 | # Append the parameter 'score' for the weighted version of the algorithm.
135 | louvain(database, 'Document', 'is_similar', 'community')
136 | print('Similarity graph created.')
137 | return
138 |
139 | def create_clustering_tags(database, top_terms = 25):
140 | """
141 | This functions creates, in the Neo4j database,
142 | for all communities, the relationships that connect
143 | document nodes of a similarity community with top important
144 | clustering tags for that community, based on the amount of common
145 | appearances between documents and a higher pagerank score.
146 | """
147 | current_system = platform.system()
148 | # Remove has_tag edges from previous iterations.
149 | database.execute('MATCH ()-[r:has_tag]->() DELETE r', 'w')
150 |
151 | # Get all id numbers from communities and all their assosiated file(name)s.
152 | print('Loading all community ids and their filenames...')
153 | results = get_communities_filenames(database)
154 |
155 | # The communities are ordered by filecount, which means that after the first one found,
156 | # with 1 file all the rest have the same amount of documents.
157 | # These communities are a side effect of the Louvain implementation of Neo4j.
158 | # There is no reason to create tags in isolated communities, since there are no common tags,
159 | # with other documents. Therefore we are going to filter them out of the results list.
160 | index = 0
161 | for result in results:
162 | if result[2] == 1: # filecount == 1
163 | break
164 | index = index + 1
165 |
166 | # Slice the list based on the first found index.
167 | results = results[:index]
168 | # Count all results (rows) for a simple loading screen.
169 | count = 1
170 | total_count = len(results)
171 |
172 | # Get all top tags for each community.
173 | top_tags = get_communities_tags(database, top_terms)
174 |
175 | for [community, filenames, _] in results:
176 | # Print the number of the currently processed community.
177 | print(f'Processing {count} out of {total_count} communities...' )
178 | try:
179 | tags = top_tags[community]
180 | except KeyError:
181 | print('\t* Error: Community key should exist in dictionary!')
182 |
183 | # Connect filenames of a specific community with all their associated tags.
184 | # Tags are considered to be important words that describe that community,
185 | # and which already exist in the graphofdocs model.
186 | query = (f'UNWIND {filenames} AS filename '
187 | 'MATCH (d:Document {filename: filename}) '
188 | f'UNWIND {tags} AS tag '
189 | 'MATCH (w:Word {key: tag}) '
190 | 'CREATE (d)-[r:has_tag]->(w)')
191 | database.execute(query, 'w')
192 |
193 | # Update the progress counter.
194 | count = count + 1
195 | # Clear the screen to output the update the progress counter.
196 | clear_screen(current_system)
197 | return
198 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_categories/AMAZON_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.9838,9771,623,308,NB,,,
3 | 1,BOW+LR,0.9838,9771,623,308,LR,,,
4 | 2,BOW+5NN,0.6039,9771,623,308,5NN,,,
5 | 3,BOW+2NN,0.7532,9771,623,308,2NN,,,
6 | 4,BOW+1KNN,0.75,9771,623,308,1KNN,,,
7 | 5,BOW+LSVM,0.9935,9771,623,308,LSVM,,,
8 | 6,BOW+NN100x50,0.9773,9771,623,308,NN100x50,,,
9 | 7,META+NB,0.987,2935,623,308,NB,,,
10 | 8,META+LR,0.9838,2935,623,308,LR,,,
11 | 9,META+5NN,0.6656,2935,623,308,5NN,,,
12 | 10,META+2NN,0.8344,2935,623,308,2NN,,,
13 | 11,META+1KNN,0.8279,2935,623,308,1KNN,,,
14 | 12,META+LSVM,0.9935,2935,623,308,LSVM,,,
15 | 13,META+NN100x50,0.987,2935,623,308,NN100x50,,,
16 | 14,LVAR+NB,0.9838,9771,623,308,NB,0.0005,,
17 | 15,LVAR+LR,0.9838,9771,623,308,LR,0.0005,,
18 | 16,LVAR+5NN,0.6039,9771,623,308,5NN,0.0005,,
19 | 17,LVAR+2NN,0.7532,9771,623,308,2NN,0.0005,,
20 | 18,LVAR+1KNN,0.75,9771,623,308,1KNN,0.0005,,
21 | 19,LVAR+LSVM,0.9935,9771,623,308,LSVM,0.0005,,
22 | 20,LVAR+NN100x50,0.9773,9771,623,308,NN100x50,0.0005,,
23 | 21,LVAR+NB,0.9838,9771,623,308,NB,0.001,,
24 | 22,LVAR+LR,0.9838,9771,623,308,LR,0.001,,
25 | 23,LVAR+5NN,0.6039,9771,623,308,5NN,0.001,,
26 | 24,LVAR+2NN,0.7532,9771,623,308,2NN,0.001,,
27 | 25,LVAR+1KNN,0.75,9771,623,308,1KNN,0.001,,
28 | 26,LVAR+LSVM,0.9935,9771,623,308,LSVM,0.001,,
29 | 27,LVAR+NN100x50,0.9773,9771,623,308,NN100x50,0.001,,
30 | 28,LVAR+NB,0.9838,9771,623,308,NB,0.0015,,
31 | 29,LVAR+LR,0.9838,9771,623,308,LR,0.0015,,
32 | 30,LVAR+5NN,0.6039,9771,623,308,5NN,0.0015,,
33 | 31,LVAR+2NN,0.7532,9771,623,308,2NN,0.0015,,
34 | 32,LVAR+1KNN,0.75,9771,623,308,1KNN,0.0015,,
35 | 33,LVAR+LSVM,0.9935,9771,623,308,LSVM,0.0015,,
36 | 34,LVAR+NN100x50,0.9773,9771,623,308,NN100x50,0.0015,,
37 | 35,LVAR+NB,0.9838,6000,623,308,NB,0.002,,
38 | 36,LVAR+LR,0.9838,6000,623,308,LR,0.002,,
39 | 37,LVAR+5NN,0.5942,6000,623,308,5NN,0.002,,
40 | 38,LVAR+2NN,0.7305,6000,623,308,2NN,0.002,,
41 | 39,LVAR+1KNN,0.7273,6000,623,308,1KNN,0.002,,
42 | 40,LVAR+LSVM,1.0,6000,623,308,LSVM,0.002,,
43 | 41,LVAR+NN100x50,0.9838,6000,623,308,NN100x50,0.002,,
44 | 42,LVAR+NB,0.9838,6000,623,308,NB,0.003,,
45 | 43,LVAR+LR,0.9838,6000,623,308,LR,0.003,,
46 | 44,LVAR+5NN,0.5942,6000,623,308,5NN,0.003,,
47 | 45,LVAR+2NN,0.7305,6000,623,308,2NN,0.003,,
48 | 46,LVAR+1KNN,0.7273,6000,623,308,1KNN,0.003,,
49 | 47,LVAR+LSVM,1.0,6000,623,308,LSVM,0.003,,
50 | 48,LVAR+NN100x50,0.9838,6000,623,308,NN100x50,0.003,,
51 | 49,LVAR+NB,0.9935,3637,623,308,NB,0.004,,
52 | 50,LVAR+LR,0.9935,3637,623,308,LR,0.004,,
53 | 51,LVAR+5NN,0.6331,3637,623,308,5NN,0.004,,
54 | 52,LVAR+2NN,0.776,3637,623,308,2NN,0.004,,
55 | 53,LVAR+1KNN,0.7727,3637,623,308,1KNN,0.004,,
56 | 54,LVAR+LSVM,1.0,3637,623,308,LSVM,0.004,,
57 | 55,LVAR+NN100x50,0.9805,3637,623,308,NN100x50,0.004,,
58 | 56,LVAR+NB,0.9903,2906,623,308,NB,0.005,,
59 | 57,LVAR+LR,0.9935,2906,623,308,LR,0.005,,
60 | 58,LVAR+5NN,0.6883,2906,623,308,5NN,0.005,,
61 | 59,LVAR+2NN,0.7955,2906,623,308,2NN,0.005,,
62 | 60,LVAR+1KNN,0.7825,2906,623,308,1KNN,0.005,,
63 | 61,LVAR+LSVM,1.0,2906,623,308,LSVM,0.005,,
64 | 62,LVAR+NN100x50,0.9805,2906,623,308,NN100x50,0.005,,
65 | 63,LVAR+NB,0.9935,1719,623,308,NB,0.01,,
66 | 64,LVAR+LR,0.9935,1719,623,308,LR,0.01,,
67 | 65,LVAR+5NN,0.6916,1719,623,308,5NN,0.01,,
68 | 66,LVAR+2NN,0.8084,1719,623,308,2NN,0.01,,
69 | 67,LVAR+1KNN,0.7597,1719,623,308,1KNN,0.01,,
70 | 68,LVAR+LSVM,1.0,1719,623,308,LSVM,0.01,,
71 | 69,LVAR+NN100x50,0.9838,1719,623,308,NN100x50,0.01,,
72 | 70,KBEST+NB,1.0,350,623,308,NB,,350.0,
73 | 71,KBEST+LR,0.9935,350,623,308,LR,,350.0,
74 | 72,KBEST+5NN,0.9513,350,623,308,5NN,,350.0,
75 | 73,KBEST+2NN,0.9838,350,623,308,2NN,,350.0,
76 | 74,KBEST+1KNN,0.9773,350,623,308,1KNN,,350.0,
77 | 75,KBEST+LSVM,1.0,350,623,308,LSVM,,350.0,
78 | 76,KBEST+NN100x50,1.0,350,623,308,NN100x50,,350.0,
79 | 77,KBEST+NB,1.0,500,623,308,NB,,500.0,
80 | 78,KBEST+LR,0.9935,500,623,308,LR,,500.0,
81 | 79,KBEST+5NN,0.8864,500,623,308,5NN,,500.0,
82 | 80,KBEST+2NN,0.9578,500,623,308,2NN,,500.0,
83 | 81,KBEST+1KNN,0.9221,500,623,308,1KNN,,500.0,
84 | 82,KBEST+LSVM,1.0,500,623,308,LSVM,,500.0,
85 | 83,KBEST+NN100x50,0.9935,500,623,308,NN100x50,,500.0,
86 | 84,KBEST+NB,0.9968,1000,623,308,NB,,1000.0,
87 | 85,KBEST+LR,0.9903,1000,623,308,LR,,1000.0,
88 | 86,KBEST+5NN,0.8052,1000,623,308,5NN,,1000.0,
89 | 87,KBEST+2NN,0.9091,1000,623,308,2NN,,1000.0,
90 | 88,KBEST+1KNN,0.8994,1000,623,308,1KNN,,1000.0,
91 | 89,KBEST+LSVM,1.0,1000,623,308,LSVM,,1000.0,
92 | 90,KBEST+NN100x50,0.9805,1000,623,308,NN100x50,,1000.0,
93 | 91,KBEST+NB,0.9838,2000,623,308,NB,,2000.0,
94 | 92,KBEST+LR,0.9838,2000,623,308,LR,,2000.0,
95 | 93,KBEST+5NN,0.7078,2000,623,308,5NN,,2000.0,
96 | 94,KBEST+2NN,0.8247,2000,623,308,2NN,,2000.0,
97 | 95,KBEST+1KNN,0.8019,2000,623,308,1KNN,,2000.0,
98 | 96,KBEST+LSVM,0.9935,2000,623,308,LSVM,,2000.0,
99 | 97,KBEST+NN100x50,0.974,2000,623,308,NN100x50,,2000.0,
100 | 98,KBEST+NB,0.987,3000,623,308,NB,,3000.0,
101 | 99,KBEST+LR,0.9838,3000,623,308,LR,,3000.0,
102 | 100,KBEST+5NN,0.6558,3000,623,308,5NN,,3000.0,
103 | 101,KBEST+2NN,0.7695,3000,623,308,2NN,,3000.0,
104 | 102,KBEST+1KNN,0.7565,3000,623,308,1KNN,,3000.0,
105 | 103,KBEST+LSVM,0.9935,3000,623,308,LSVM,,3000.0,
106 | 104,KBEST+NN100x50,0.9773,3000,623,308,NN100x50,,3000.0,
107 | 105,KBEST+NB,0.987,4000,623,308,NB,,4000.0,
108 | 106,KBEST+LR,0.9838,4000,623,308,LR,,4000.0,
109 | 107,KBEST+5NN,0.5844,4000,623,308,5NN,,4000.0,
110 | 108,KBEST+2NN,0.7435,4000,623,308,2NN,,4000.0,
111 | 109,KBEST+1KNN,0.7403,4000,623,308,1KNN,,4000.0,
112 | 110,KBEST+LSVM,0.9935,4000,623,308,LSVM,,4000.0,
113 | 111,KBEST+NN100x50,0.974,4000,623,308,NN100x50,,4000.0,
114 | 112,KBEST+NB,0.987,5000,623,308,NB,,5000.0,
115 | 113,KBEST+LR,0.9838,5000,623,308,LR,,5000.0,
116 | 114,KBEST+5NN,0.5844,5000,623,308,5NN,,5000.0,
117 | 115,KBEST+2NN,0.7305,5000,623,308,2NN,,5000.0,
118 | 116,KBEST+1KNN,0.7273,5000,623,308,1KNN,,5000.0,
119 | 117,KBEST+LSVM,0.9935,5000,623,308,LSVM,,5000.0,
120 | 118,KBEST+NN100x50,0.9773,5000,623,308,NN100x50,,5000.0,
121 | 119,KBEST+NB,0.987,6000,623,308,NB,,6000.0,
122 | 120,KBEST+LR,0.9838,6000,623,308,LR,,6000.0,
123 | 121,KBEST+5NN,0.5682,6000,623,308,5NN,,6000.0,
124 | 122,KBEST+2NN,0.7468,6000,623,308,2NN,,6000.0,
125 | 123,KBEST+1KNN,0.7305,6000,623,308,1KNN,,6000.0,
126 | 124,KBEST+LSVM,0.9935,6000,623,308,LSVM,,6000.0,
127 | 125,KBEST+NN100x50,0.974,6000,623,308,NN100x50,,6000.0,
128 | 126,KBEST+NB,0.987,7000,623,308,NB,,7000.0,
129 | 127,KBEST+LR,0.9838,7000,623,308,LR,,7000.0,
130 | 128,KBEST+5NN,0.5325,7000,623,308,5NN,,7000.0,
131 | 129,KBEST+2NN,0.7208,7000,623,308,2NN,,7000.0,
132 | 130,KBEST+1KNN,0.711,7000,623,308,1KNN,,7000.0,
133 | 131,KBEST+LSVM,0.9935,7000,623,308,LSVM,,7000.0,
134 | 132,KBEST+NN100x50,0.9773,7000,623,308,NN100x50,,7000.0,
135 | 133,TOPN+NB,0.9903,372,623,308,NB,,,5.0
136 | 134,TOPN+LR,0.9903,372,623,308,LR,,,5.0
137 | 135,TOPN+5NN,0.9805,372,623,308,5NN,,,5.0
138 | 136,TOPN+2NN,0.9805,372,623,308,2NN,,,5.0
139 | 137,TOPN+1KNN,0.9708,372,623,308,1KNN,,,5.0
140 | 138,TOPN+LSVM,0.9935,372,623,308,LSVM,,,5.0
141 | 139,TOPN+NN100x50,0.9935,372,623,308,NN100x50,,,5.0
142 | 140,TOPN+NB,0.9935,1065,623,308,NB,,,10.0
143 | 141,TOPN+LR,0.9935,1065,623,308,LR,,,10.0
144 | 142,TOPN+5NN,0.9318,1065,623,308,5NN,,,10.0
145 | 143,TOPN+2NN,0.9513,1065,623,308,2NN,,,10.0
146 | 144,TOPN+1KNN,0.9448,1065,623,308,1KNN,,,10.0
147 | 145,TOPN+LSVM,1.0,1065,623,308,LSVM,,,10.0
148 | 146,TOPN+NN100x50,0.9935,1065,623,308,NN100x50,,,10.0
149 | 147,TOPN+NB,0.9935,1557,623,308,NB,,,15.0
150 | 148,TOPN+LR,0.9935,1557,623,308,LR,,,15.0
151 | 149,TOPN+5NN,0.7955,1557,623,308,5NN,,,15.0
152 | 150,TOPN+2NN,0.8701,1557,623,308,2NN,,,15.0
153 | 151,TOPN+1KNN,0.8474,1557,623,308,1KNN,,,15.0
154 | 152,TOPN+LSVM,1.0,1557,623,308,LSVM,,,15.0
155 | 153,TOPN+NN100x50,0.9935,1557,623,308,NN100x50,,,15.0
156 | 154,TOPN+NB,0.9935,1940,623,308,NB,,,20.0
157 | 155,TOPN+LR,0.9935,1940,623,308,LR,,,20.0
158 | 156,TOPN+5NN,0.7175,1940,623,308,5NN,,,20.0
159 | 157,TOPN+2NN,0.8182,1940,623,308,2NN,,,20.0
160 | 158,TOPN+1KNN,0.8149,1940,623,308,1KNN,,,20.0
161 | 159,TOPN+LSVM,1.0,1940,623,308,LSVM,,,20.0
162 | 160,TOPN+NN100x50,0.9968,1940,623,308,NN100x50,,,20.0
163 | 161,TOPN+NB,0.987,2372,623,308,NB,,,25.0
164 | 162,TOPN+LR,0.9935,2372,623,308,LR,,,25.0
165 | 163,TOPN+5NN,0.6656,2372,623,308,5NN,,,25.0
166 | 164,TOPN+2NN,0.7922,2372,623,308,2NN,,,25.0
167 | 165,TOPN+1KNN,0.7922,2372,623,308,1KNN,,,25.0
168 | 166,TOPN+LSVM,1.0,2372,623,308,LSVM,,,25.0
169 | 167,TOPN+NN100x50,0.9903,2372,623,308,NN100x50,,,25.0
170 | 168,TOPN+NB,0.9838,4897,623,308,NB,,,50.0
171 | 169,TOPN+LR,0.9838,4897,623,308,LR,,,50.0
172 | 170,TOPN+5NN,0.6201,4897,623,308,5NN,,,50.0
173 | 171,TOPN+2NN,0.7825,4897,623,308,2NN,,,50.0
174 | 172,TOPN+1KNN,0.776,4897,623,308,1KNN,,,50.0
175 | 173,TOPN+LSVM,1.0,4897,623,308,LSVM,,,50.0
176 | 174,TOPN+NN100x50,0.987,4897,623,308,NN100x50,,,50.0
177 | 175,TOPN+NB,0.9838,5777,623,308,NB,,,100.0
178 | 176,TOPN+LR,0.9838,5777,623,308,LR,,,100.0
179 | 177,TOPN+5NN,0.6201,5777,623,308,5NN,,,100.0
180 | 178,TOPN+2NN,0.7727,5777,623,308,2NN,,,100.0
181 | 179,TOPN+1KNN,0.7662,5777,623,308,1KNN,,,100.0
182 | 180,TOPN+LSVM,0.9968,5777,623,308,LSVM,,,100.0
183 | 181,TOPN+NN100x50,0.987,5777,623,308,NN100x50,,,100.0
184 | 182,TOPN+NB,0.9838,6692,623,308,NB,,,250.0
185 | 183,TOPN+LR,0.9838,6692,623,308,LR,,,250.0
186 | 184,TOPN+5NN,0.6136,6692,623,308,5NN,,,250.0
187 | 185,TOPN+2NN,0.763,6692,623,308,2NN,,,250.0
188 | 186,TOPN+1KNN,0.7597,6692,623,308,1KNN,,,250.0
189 | 187,TOPN+LSVM,0.9968,6692,623,308,LSVM,,,250.0
190 | 188,TOPN+NN100x50,0.9773,6692,623,308,NN100x50,,,250.0
191 | 189,TOPN+NB,0.9838,7005,623,308,NB,,,500.0
192 | 190,TOPN+LR,0.9838,7005,623,308,LR,,,500.0
193 | 191,TOPN+5NN,0.6169,7005,623,308,5NN,,,500.0
194 | 192,TOPN+2NN,0.763,7005,623,308,2NN,,,500.0
195 | 193,TOPN+1KNN,0.7597,7005,623,308,1KNN,,,500.0
196 | 194,TOPN+LSVM,0.9968,7005,623,308,LSVM,,,500.0
197 | 195,TOPN+NN100x50,0.9773,7005,623,308,NN100x50,,,500.0
198 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/amazon_sentiment/AMAZON_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.7208,9771,623,308,NB,,,
3 | 1,BOW+LR,0.763,9771,623,308,LR,,,
4 | 2,BOW+5NN,0.6169,9771,623,308,5NN,,,
5 | 3,BOW+2NN,0.6526,9771,623,308,2NN,,,
6 | 4,BOW+1KNN,0.6429,9771,623,308,1KNN,,,
7 | 5,BOW+LSVM,0.763,9771,623,308,LSVM,,,
8 | 6,BOW+NN100x50,0.7273,9771,623,308,NN100x50,,,
9 | 7,META+NB,0.737,2731,623,308,NB,,,
10 | 8,META+LR,0.789,2731,623,308,LR,,,
11 | 9,META+5NN,0.6623,2731,623,308,5NN,,,
12 | 10,META+2NN,0.6623,2731,623,308,2NN,,,
13 | 11,META+1KNN,0.6558,2731,623,308,1KNN,,,
14 | 12,META+LSVM,0.7727,2731,623,308,LSVM,,,
15 | 13,META+NN100x50,0.75,2731,623,308,NN100x50,,,
16 | 14,LVAR+NB,0.7208,9771,623,308,NB,0.0005,,
17 | 15,LVAR+LR,0.763,9771,623,308,LR,0.0005,,
18 | 16,LVAR+5NN,0.6169,9771,623,308,5NN,0.0005,,
19 | 17,LVAR+2NN,0.6526,9771,623,308,2NN,0.0005,,
20 | 18,LVAR+1KNN,0.6429,9771,623,308,1KNN,0.0005,,
21 | 19,LVAR+LSVM,0.763,9771,623,308,LSVM,0.0005,,
22 | 20,LVAR+NN100x50,0.7273,9771,623,308,NN100x50,0.0005,,
23 | 21,LVAR+NB,0.7208,9771,623,308,NB,0.001,,
24 | 22,LVAR+LR,0.763,9771,623,308,LR,0.001,,
25 | 23,LVAR+5NN,0.6169,9771,623,308,5NN,0.001,,
26 | 24,LVAR+2NN,0.6526,9771,623,308,2NN,0.001,,
27 | 25,LVAR+1KNN,0.6429,9771,623,308,1KNN,0.001,,
28 | 26,LVAR+LSVM,0.763,9771,623,308,LSVM,0.001,,
29 | 27,LVAR+NN100x50,0.7273,9771,623,308,NN100x50,0.001,,
30 | 28,LVAR+NB,0.7208,9771,623,308,NB,0.0015,,
31 | 29,LVAR+LR,0.763,9771,623,308,LR,0.0015,,
32 | 30,LVAR+5NN,0.6169,9771,623,308,5NN,0.0015,,
33 | 31,LVAR+2NN,0.6526,9771,623,308,2NN,0.0015,,
34 | 32,LVAR+1KNN,0.6429,9771,623,308,1KNN,0.0015,,
35 | 33,LVAR+LSVM,0.763,9771,623,308,LSVM,0.0015,,
36 | 34,LVAR+NN100x50,0.7273,9771,623,308,NN100x50,0.0015,,
37 | 35,LVAR+NB,0.724,6000,623,308,NB,0.002,,
38 | 36,LVAR+LR,0.7695,6000,623,308,LR,0.002,,
39 | 37,LVAR+5NN,0.6721,6000,623,308,5NN,0.002,,
40 | 38,LVAR+2NN,0.6688,6000,623,308,2NN,0.002,,
41 | 39,LVAR+1KNN,0.6688,6000,623,308,1KNN,0.002,,
42 | 40,LVAR+LSVM,0.7662,6000,623,308,LSVM,0.002,,
43 | 41,LVAR+NN100x50,0.7435,6000,623,308,NN100x50,0.002,,
44 | 42,LVAR+NB,0.724,6000,623,308,NB,0.003,,
45 | 43,LVAR+LR,0.7695,6000,623,308,LR,0.003,,
46 | 44,LVAR+5NN,0.6721,6000,623,308,5NN,0.003,,
47 | 45,LVAR+2NN,0.6688,6000,623,308,2NN,0.003,,
48 | 46,LVAR+1KNN,0.6688,6000,623,308,1KNN,0.003,,
49 | 47,LVAR+LSVM,0.7662,6000,623,308,LSVM,0.003,,
50 | 48,LVAR+NN100x50,0.7435,6000,623,308,NN100x50,0.003,,
51 | 49,LVAR+NB,0.7338,3637,623,308,NB,0.004,,
52 | 50,LVAR+LR,0.7792,3637,623,308,LR,0.004,,
53 | 51,LVAR+5NN,0.6331,3637,623,308,5NN,0.004,,
54 | 52,LVAR+2NN,0.6721,3637,623,308,2NN,0.004,,
55 | 53,LVAR+1KNN,0.6818,3637,623,308,1KNN,0.004,,
56 | 54,LVAR+LSVM,0.7695,3637,623,308,LSVM,0.004,,
57 | 55,LVAR+NN100x50,0.7468,3637,623,308,NN100x50,0.004,,
58 | 56,LVAR+NB,0.737,2906,623,308,NB,0.005,,
59 | 57,LVAR+LR,0.7727,2906,623,308,LR,0.005,,
60 | 58,LVAR+5NN,0.6656,2906,623,308,5NN,0.005,,
61 | 59,LVAR+2NN,0.6818,2906,623,308,2NN,0.005,,
62 | 60,LVAR+1KNN,0.6883,2906,623,308,1KNN,0.005,,
63 | 61,LVAR+LSVM,0.763,2906,623,308,LSVM,0.005,,
64 | 62,LVAR+NN100x50,0.7403,2906,623,308,NN100x50,0.005,,
65 | 63,LVAR+NB,0.7338,1719,623,308,NB,0.01,,
66 | 64,LVAR+LR,0.7727,1719,623,308,LR,0.01,,
67 | 65,LVAR+5NN,0.7013,1719,623,308,5NN,0.01,,
68 | 66,LVAR+2NN,0.6818,1719,623,308,2NN,0.01,,
69 | 67,LVAR+1KNN,0.6916,1719,623,308,1KNN,0.01,,
70 | 68,LVAR+LSVM,0.7825,1719,623,308,LSVM,0.01,,
71 | 69,LVAR+NN100x50,0.75,1719,623,308,NN100x50,0.01,,
72 | 70,KBEST+NB,0.7403,350,623,308,NB,,350.0,
73 | 71,KBEST+LR,0.724,350,623,308,LR,,350.0,
74 | 72,KBEST+5NN,0.724,350,623,308,5NN,,350.0,
75 | 73,KBEST+2NN,0.6851,350,623,308,2NN,,350.0,
76 | 74,KBEST+1KNN,0.6786,350,623,308,1KNN,,350.0,
77 | 75,KBEST+LSVM,0.711,350,623,308,LSVM,,350.0,
78 | 76,KBEST+NN100x50,0.7338,350,623,308,NN100x50,,350.0,
79 | 77,KBEST+NB,0.75,500,623,308,NB,,500.0,
80 | 78,KBEST+LR,0.724,500,623,308,LR,,500.0,
81 | 79,KBEST+5NN,0.711,500,623,308,5NN,,500.0,
82 | 80,KBEST+2NN,0.6883,500,623,308,2NN,,500.0,
83 | 81,KBEST+1KNN,0.6916,500,623,308,1KNN,,500.0,
84 | 82,KBEST+LSVM,0.7338,500,623,308,LSVM,,500.0,
85 | 83,KBEST+NN100x50,0.7468,500,623,308,NN100x50,,500.0,
86 | 84,KBEST+NB,0.7305,1000,623,308,NB,,1000.0,
87 | 85,KBEST+LR,0.7435,1000,623,308,LR,,1000.0,
88 | 86,KBEST+5NN,0.6721,1000,623,308,5NN,,1000.0,
89 | 87,KBEST+2NN,0.6688,1000,623,308,2NN,,1000.0,
90 | 88,KBEST+1KNN,0.6883,1000,623,308,1KNN,,1000.0,
91 | 89,KBEST+LSVM,0.7532,1000,623,308,LSVM,,1000.0,
92 | 90,KBEST+NN100x50,0.7565,1000,623,308,NN100x50,,1000.0,
93 | 91,KBEST+NB,0.737,2000,623,308,NB,,2000.0,
94 | 92,KBEST+LR,0.7662,2000,623,308,LR,,2000.0,
95 | 93,KBEST+5NN,0.6721,2000,623,308,5NN,,2000.0,
96 | 94,KBEST+2NN,0.6688,2000,623,308,2NN,,2000.0,
97 | 95,KBEST+1KNN,0.6753,2000,623,308,1KNN,,2000.0,
98 | 96,KBEST+LSVM,0.763,2000,623,308,LSVM,,2000.0,
99 | 97,KBEST+NN100x50,0.7468,2000,623,308,NN100x50,,2000.0,
100 | 98,KBEST+NB,0.7403,3000,623,308,NB,,3000.0,
101 | 99,KBEST+LR,0.7727,3000,623,308,LR,,3000.0,
102 | 100,KBEST+5NN,0.6656,3000,623,308,5NN,,3000.0,
103 | 101,KBEST+2NN,0.6331,3000,623,308,2NN,,3000.0,
104 | 102,KBEST+1KNN,0.6201,3000,623,308,1KNN,,3000.0,
105 | 103,KBEST+LSVM,0.7435,3000,623,308,LSVM,,3000.0,
106 | 104,KBEST+NN100x50,0.7338,3000,623,308,NN100x50,,3000.0,
107 | 105,KBEST+NB,0.737,4000,623,308,NB,,4000.0,
108 | 106,KBEST+LR,0.7468,4000,623,308,LR,,4000.0,
109 | 107,KBEST+5NN,0.6623,4000,623,308,5NN,,4000.0,
110 | 108,KBEST+2NN,0.6429,4000,623,308,2NN,,4000.0,
111 | 109,KBEST+1KNN,0.6526,4000,623,308,1KNN,,4000.0,
112 | 110,KBEST+LSVM,0.763,4000,623,308,LSVM,,4000.0,
113 | 111,KBEST+NN100x50,0.75,4000,623,308,NN100x50,,4000.0,
114 | 112,KBEST+NB,0.7435,5000,623,308,NB,,5000.0,
115 | 113,KBEST+LR,0.7597,5000,623,308,LR,,5000.0,
116 | 114,KBEST+5NN,0.6331,5000,623,308,5NN,,5000.0,
117 | 115,KBEST+2NN,0.6623,5000,623,308,2NN,,5000.0,
118 | 116,KBEST+1KNN,0.6623,5000,623,308,1KNN,,5000.0,
119 | 117,KBEST+LSVM,0.7532,5000,623,308,LSVM,,5000.0,
120 | 118,KBEST+NN100x50,0.724,5000,623,308,NN100x50,,5000.0,
121 | 119,KBEST+NB,0.737,6000,623,308,NB,,6000.0,
122 | 120,KBEST+LR,0.763,6000,623,308,LR,,6000.0,
123 | 121,KBEST+5NN,0.6494,6000,623,308,5NN,,6000.0,
124 | 122,KBEST+2NN,0.6494,6000,623,308,2NN,,6000.0,
125 | 123,KBEST+1KNN,0.6558,6000,623,308,1KNN,,6000.0,
126 | 124,KBEST+LSVM,0.763,6000,623,308,LSVM,,6000.0,
127 | 125,KBEST+NN100x50,0.724,6000,623,308,NN100x50,,6000.0,
128 | 126,KBEST+NB,0.7305,7000,623,308,NB,,7000.0,
129 | 127,KBEST+LR,0.7597,7000,623,308,LR,,7000.0,
130 | 128,KBEST+5NN,0.6818,7000,623,308,5NN,,7000.0,
131 | 129,KBEST+2NN,0.6883,7000,623,308,2NN,,7000.0,
132 | 130,KBEST+1KNN,0.6786,7000,623,308,1KNN,,7000.0,
133 | 131,KBEST+LSVM,0.7403,7000,623,308,LSVM,,7000.0,
134 | 132,KBEST+NN100x50,0.7305,7000,623,308,NN100x50,,7000.0,
135 | 133,TOPN+NB,0.7013,372,623,308,NB,,,5.0
136 | 134,TOPN+LR,0.6818,372,623,308,LR,,,5.0
137 | 135,TOPN+5NN,0.6688,372,623,308,5NN,,,5.0
138 | 136,TOPN+2NN,0.6883,372,623,308,2NN,,,5.0
139 | 137,TOPN+1KNN,0.6883,372,623,308,1KNN,,,5.0
140 | 138,TOPN+LSVM,0.6591,372,623,308,LSVM,,,5.0
141 | 139,TOPN+NN100x50,0.7013,372,623,308,NN100x50,,,5.0
142 | 140,TOPN+NB,0.6786,1065,623,308,NB,,,10.0
143 | 141,TOPN+LR,0.7565,1065,623,308,LR,,,10.0
144 | 142,TOPN+5NN,0.6786,1065,623,308,5NN,,,10.0
145 | 143,TOPN+2NN,0.6981,1065,623,308,2NN,,,10.0
146 | 144,TOPN+1KNN,0.711,1065,623,308,1KNN,,,10.0
147 | 145,TOPN+LSVM,0.737,1065,623,308,LSVM,,,10.0
148 | 146,TOPN+NN100x50,0.737,1065,623,308,NN100x50,,,10.0
149 | 147,TOPN+NB,0.6948,1557,623,308,NB,,,15.0
150 | 148,TOPN+LR,0.7565,1557,623,308,LR,,,15.0
151 | 149,TOPN+5NN,0.6688,1557,623,308,5NN,,,15.0
152 | 150,TOPN+2NN,0.6818,1557,623,308,2NN,,,15.0
153 | 151,TOPN+1KNN,0.6851,1557,623,308,1KNN,,,15.0
154 | 152,TOPN+LSVM,0.737,1557,623,308,LSVM,,,15.0
155 | 153,TOPN+NN100x50,0.7305,1557,623,308,NN100x50,,,15.0
156 | 154,TOPN+NB,0.7273,1940,623,308,NB,,,20.0
157 | 155,TOPN+LR,0.7662,1940,623,308,LR,,,20.0
158 | 156,TOPN+5NN,0.6331,1940,623,308,5NN,,,20.0
159 | 157,TOPN+2NN,0.6688,1940,623,308,2NN,,,20.0
160 | 158,TOPN+1KNN,0.6786,1940,623,308,1KNN,,,20.0
161 | 159,TOPN+LSVM,0.7532,1940,623,308,LSVM,,,20.0
162 | 160,TOPN+NN100x50,0.7403,1940,623,308,NN100x50,,,20.0
163 | 161,TOPN+NB,0.7208,2372,623,308,NB,,,25.0
164 | 162,TOPN+LR,0.7565,2372,623,308,LR,,,25.0
165 | 163,TOPN+5NN,0.6299,2372,623,308,5NN,,,25.0
166 | 164,TOPN+2NN,0.6526,2372,623,308,2NN,,,25.0
167 | 165,TOPN+1KNN,0.6623,2372,623,308,1KNN,,,25.0
168 | 166,TOPN+LSVM,0.7565,2372,623,308,LSVM,,,25.0
169 | 167,TOPN+NN100x50,0.7305,2372,623,308,NN100x50,,,25.0
170 | 168,TOPN+NB,0.7208,4897,623,308,NB,,,50.0
171 | 169,TOPN+LR,0.776,4897,623,308,LR,,,50.0
172 | 170,TOPN+5NN,0.6461,4897,623,308,5NN,,,50.0
173 | 171,TOPN+2NN,0.6753,4897,623,308,2NN,,,50.0
174 | 172,TOPN+1KNN,0.6851,4897,623,308,1KNN,,,50.0
175 | 173,TOPN+LSVM,0.763,4897,623,308,LSVM,,,50.0
176 | 174,TOPN+NN100x50,0.7045,4897,623,308,NN100x50,,,50.0
177 | 175,TOPN+NB,0.7208,5777,623,308,NB,,,100.0
178 | 176,TOPN+LR,0.7662,5777,623,308,LR,,,100.0
179 | 177,TOPN+5NN,0.6364,5777,623,308,5NN,,,100.0
180 | 178,TOPN+2NN,0.6753,5777,623,308,2NN,,,100.0
181 | 179,TOPN+1KNN,0.6883,5777,623,308,1KNN,,,100.0
182 | 180,TOPN+LSVM,0.7403,5777,623,308,LSVM,,,100.0
183 | 181,TOPN+NN100x50,0.7143,5777,623,308,NN100x50,,,100.0
184 | 182,TOPN+NB,0.7175,6692,623,308,NB,,,250.0
185 | 183,TOPN+LR,0.7695,6692,623,308,LR,,,250.0
186 | 184,TOPN+5NN,0.6396,6692,623,308,5NN,,,250.0
187 | 185,TOPN+2NN,0.6591,6692,623,308,2NN,,,250.0
188 | 186,TOPN+1KNN,0.6656,6692,623,308,1KNN,,,250.0
189 | 187,TOPN+LSVM,0.7435,6692,623,308,LSVM,,,250.0
190 | 188,TOPN+NN100x50,0.7078,6692,623,308,NN100x50,,,250.0
191 | 189,TOPN+NB,0.7208,7005,623,308,NB,,,500.0
192 | 190,TOPN+LR,0.7662,7005,623,308,LR,,,500.0
193 | 191,TOPN+5NN,0.6299,7005,623,308,5NN,,,500.0
194 | 192,TOPN+2NN,0.6591,7005,623,308,2NN,,,500.0
195 | 193,TOPN+1KNN,0.6656,7005,623,308,1KNN,,,500.0
196 | 194,TOPN+LSVM,0.7403,7005,623,308,LSVM,,,500.0
197 | 195,TOPN+NN100x50,0.7175,7005,623,308,NN100x50,,,500.0
198 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/LINGSPAM_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.9963,16695,546,270,NB,,,
3 | 1,BOW+LR,1.0,16695,546,270,LR,,,
4 | 2,BOW+5NN,0.8333,16695,546,270,5NN,,,
5 | 3,BOW+2NN,0.9074,16695,546,270,2NN,,,
6 | 4,BOW+1KNN,0.9074,16695,546,270,1KNN,,,
7 | 5,BOW+LSVM,1.0,16695,546,270,LSVM,,,
8 | 6,BOW+NN100x50,0.9963,16695,546,270,NN100x50,,,
9 | 7,META+NB,0.9963,2509,546,270,NB,,,
10 | 8,META+LR,1.0,2509,546,270,LR,,,
11 | 9,META+5NN,0.8704,2509,546,270,5NN,,,
12 | 10,META+2NN,0.9222,2509,546,270,2NN,,,
13 | 11,META+1KNN,0.9222,2509,546,270,1KNN,,,
14 | 12,META+LSVM,1.0,2509,546,270,LSVM,,,
15 | 13,META+NN100x50,1.0,2509,546,270,NN100x50,,,
16 | 14,LVAR+NB,0.9963,16695,546,270,NB,0.0005,,
17 | 15,LVAR+LR,1.0,16695,546,270,LR,0.0005,,
18 | 16,LVAR+5NN,0.8333,16695,546,270,5NN,0.0005,,
19 | 17,LVAR+2NN,0.9074,16695,546,270,2NN,0.0005,,
20 | 18,LVAR+1KNN,0.9074,16695,546,270,1KNN,0.0005,,
21 | 19,LVAR+LSVM,1.0,16695,546,270,LSVM,0.0005,,
22 | 20,LVAR+NN100x50,0.9963,16695,546,270,NN100x50,0.0005,,
23 | 21,LVAR+NB,0.9963,16695,546,270,NB,0.001,,
24 | 22,LVAR+LR,1.0,16695,546,270,LR,0.001,,
25 | 23,LVAR+5NN,0.8333,16695,546,270,5NN,0.001,,
26 | 24,LVAR+2NN,0.9074,16695,546,270,2NN,0.001,,
27 | 25,LVAR+1KNN,0.9074,16695,546,270,1KNN,0.001,,
28 | 26,LVAR+LSVM,1.0,16695,546,270,LSVM,0.001,,
29 | 27,LVAR+NN100x50,0.9963,16695,546,270,NN100x50,0.001,,
30 | 28,LVAR+NB,0.9963,16695,546,270,NB,0.0015,,
31 | 29,LVAR+LR,1.0,16695,546,270,LR,0.0015,,
32 | 30,LVAR+5NN,0.8333,16695,546,270,5NN,0.0015,,
33 | 31,LVAR+2NN,0.9074,16695,546,270,2NN,0.0015,,
34 | 32,LVAR+1KNN,0.9074,16695,546,270,1KNN,0.0015,,
35 | 33,LVAR+LSVM,1.0,16695,546,270,LSVM,0.0015,,
36 | 34,LVAR+NN100x50,0.9963,16695,546,270,NN100x50,0.0015,,
37 | 35,LVAR+NB,0.9963,11058,546,270,NB,0.002,,
38 | 36,LVAR+LR,1.0,11058,546,270,LR,0.002,,
39 | 37,LVAR+5NN,0.8296,11058,546,270,5NN,0.002,,
40 | 38,LVAR+2NN,0.9185,11058,546,270,2NN,0.002,,
41 | 39,LVAR+1KNN,0.9185,11058,546,270,1KNN,0.002,,
42 | 40,LVAR+LSVM,0.9963,11058,546,270,LSVM,0.002,,
43 | 41,LVAR+NN100x50,1.0,11058,546,270,NN100x50,0.002,,
44 | 42,LVAR+NB,0.9963,11058,546,270,NB,0.003,,
45 | 43,LVAR+LR,1.0,11058,546,270,LR,0.003,,
46 | 44,LVAR+5NN,0.8296,11058,546,270,5NN,0.003,,
47 | 45,LVAR+2NN,0.9185,11058,546,270,2NN,0.003,,
48 | 46,LVAR+1KNN,0.9185,11058,546,270,1KNN,0.003,,
49 | 47,LVAR+LSVM,0.9963,11058,546,270,LSVM,0.003,,
50 | 48,LVAR+NN100x50,1.0,11058,546,270,NN100x50,0.003,,
51 | 49,LVAR+NB,0.9963,8234,546,270,NB,0.004,,
52 | 50,LVAR+LR,1.0,8234,546,270,LR,0.004,,
53 | 51,LVAR+5NN,0.837,8234,546,270,5NN,0.004,,
54 | 52,LVAR+2NN,0.9185,8234,546,270,2NN,0.004,,
55 | 53,LVAR+1KNN,0.9185,8234,546,270,1KNN,0.004,,
56 | 54,LVAR+LSVM,0.9963,8234,546,270,LSVM,0.004,,
57 | 55,LVAR+NN100x50,0.9963,8234,546,270,NN100x50,0.004,,
58 | 56,LVAR+NB,0.9963,8234,546,270,NB,0.005,,
59 | 57,LVAR+LR,1.0,8234,546,270,LR,0.005,,
60 | 58,LVAR+5NN,0.837,8234,546,270,5NN,0.005,,
61 | 59,LVAR+2NN,0.9185,8234,546,270,2NN,0.005,,
62 | 60,LVAR+1KNN,0.9185,8234,546,270,1KNN,0.005,,
63 | 61,LVAR+LSVM,0.9963,8234,546,270,LSVM,0.005,,
64 | 62,LVAR+NN100x50,0.9963,8234,546,270,NN100x50,0.005,,
65 | 63,LVAR+NB,0.9815,5464,546,270,NB,0.01,,
66 | 64,LVAR+LR,0.9963,5464,546,270,LR,0.01,,
67 | 65,LVAR+5NN,0.8926,5464,546,270,5NN,0.01,,
68 | 66,LVAR+2NN,0.9222,5464,546,270,2NN,0.01,,
69 | 67,LVAR+1KNN,0.9222,5464,546,270,1KNN,0.01,,
70 | 68,LVAR+LSVM,0.9963,5464,546,270,LSVM,0.01,,
71 | 69,LVAR+NN100x50,0.9963,5464,546,270,NN100x50,0.01,,
72 | 70,KBEST+NB,0.9778,1000,546,270,NB,,1000.0,
73 | 71,KBEST+LR,1.0,1000,546,270,LR,,1000.0,
74 | 72,KBEST+5NN,0.9593,1000,546,270,5NN,,1000.0,
75 | 73,KBEST+2NN,0.9815,1000,546,270,2NN,,1000.0,
76 | 74,KBEST+1KNN,0.9815,1000,546,270,1KNN,,1000.0,
77 | 75,KBEST+LSVM,1.0,1000,546,270,LSVM,,1000.0,
78 | 76,KBEST+NN100x50,1.0,1000,546,270,NN100x50,,1000.0,
79 | 77,KBEST+NB,0.9778,2000,546,270,NB,,2000.0,
80 | 78,KBEST+LR,0.9963,2000,546,270,LR,,2000.0,
81 | 79,KBEST+5NN,0.9741,2000,546,270,5NN,,2000.0,
82 | 80,KBEST+2NN,0.9741,2000,546,270,2NN,,2000.0,
83 | 81,KBEST+1KNN,0.9741,2000,546,270,1KNN,,2000.0,
84 | 82,KBEST+LSVM,0.9963,2000,546,270,LSVM,,2000.0,
85 | 83,KBEST+NN100x50,0.9963,2000,546,270,NN100x50,,2000.0,
86 | 84,KBEST+NB,0.9963,3000,546,270,NB,,3000.0,
87 | 85,KBEST+LR,0.9963,3000,546,270,LR,,3000.0,
88 | 86,KBEST+5NN,0.9667,3000,546,270,5NN,,3000.0,
89 | 87,KBEST+2NN,0.9815,3000,546,270,2NN,,3000.0,
90 | 88,KBEST+1KNN,0.9889,3000,546,270,1KNN,,3000.0,
91 | 89,KBEST+LSVM,0.9963,3000,546,270,LSVM,,3000.0,
92 | 90,KBEST+NN100x50,0.9926,3000,546,270,NN100x50,,3000.0,
93 | 91,KBEST+NB,0.9963,4000,546,270,NB,,4000.0,
94 | 92,KBEST+LR,0.9963,4000,546,270,LR,,4000.0,
95 | 93,KBEST+5NN,0.963,4000,546,270,5NN,,4000.0,
96 | 94,KBEST+2NN,0.9852,4000,546,270,2NN,,4000.0,
97 | 95,KBEST+1KNN,0.9852,4000,546,270,1KNN,,4000.0,
98 | 96,KBEST+LSVM,0.9963,4000,546,270,LSVM,,4000.0,
99 | 97,KBEST+NN100x50,0.9926,4000,546,270,NN100x50,,4000.0,
100 | 98,KBEST+NB,0.9963,5000,546,270,NB,,5000.0,
101 | 99,KBEST+LR,0.9963,5000,546,270,LR,,5000.0,
102 | 100,KBEST+5NN,0.9778,5000,546,270,5NN,,5000.0,
103 | 101,KBEST+2NN,0.9741,5000,546,270,2NN,,5000.0,
104 | 102,KBEST+1KNN,0.9741,5000,546,270,1KNN,,5000.0,
105 | 103,KBEST+LSVM,0.9963,5000,546,270,LSVM,,5000.0,
106 | 104,KBEST+NN100x50,0.9926,5000,546,270,NN100x50,,5000.0,
107 | 105,KBEST+NB,0.9963,6000,546,270,NB,,6000.0,
108 | 106,KBEST+LR,0.9963,6000,546,270,LR,,6000.0,
109 | 107,KBEST+5NN,0.9667,6000,546,270,5NN,,6000.0,
110 | 108,KBEST+2NN,0.9667,6000,546,270,2NN,,6000.0,
111 | 109,KBEST+1KNN,0.9667,6000,546,270,1KNN,,6000.0,
112 | 110,KBEST+LSVM,0.9963,6000,546,270,LSVM,,6000.0,
113 | 111,KBEST+NN100x50,0.9926,6000,546,270,NN100x50,,6000.0,
114 | 112,KBEST+NB,0.9963,7000,546,270,NB,,7000.0,
115 | 113,KBEST+LR,0.9963,7000,546,270,LR,,7000.0,
116 | 114,KBEST+5NN,0.9481,7000,546,270,5NN,,7000.0,
117 | 115,KBEST+2NN,0.9556,7000,546,270,2NN,,7000.0,
118 | 116,KBEST+1KNN,0.9556,7000,546,270,1KNN,,7000.0,
119 | 117,KBEST+LSVM,0.9963,7000,546,270,LSVM,,7000.0,
120 | 118,KBEST+NN100x50,0.9926,7000,546,270,NN100x50,,7000.0,
121 | 119,KBEST+NB,0.9963,10000,546,270,NB,,10000.0,
122 | 120,KBEST+LR,1.0,10000,546,270,LR,,10000.0,
123 | 121,KBEST+5NN,0.8889,10000,546,270,5NN,,10000.0,
124 | 122,KBEST+2NN,0.9222,10000,546,270,2NN,,10000.0,
125 | 123,KBEST+1KNN,0.9222,10000,546,270,1KNN,,10000.0,
126 | 124,KBEST+LSVM,0.9963,10000,546,270,LSVM,,10000.0,
127 | 125,KBEST+NN100x50,0.9963,10000,546,270,NN100x50,,10000.0,
128 | 126,KBEST+NB,0.9963,14000,546,270,NB,,14000.0,
129 | 127,KBEST+LR,1.0,14000,546,270,LR,,14000.0,
130 | 128,KBEST+5NN,0.837,14000,546,270,5NN,,14000.0,
131 | 129,KBEST+2NN,0.9222,14000,546,270,2NN,,14000.0,
132 | 130,KBEST+1KNN,0.9222,14000,546,270,1KNN,,14000.0,
133 | 131,KBEST+LSVM,0.9963,14000,546,270,LSVM,,14000.0,
134 | 132,KBEST+NN100x50,0.9963,14000,546,270,NN100x50,,14000.0,
135 | 133,TOPN+NB,0.9815,120,546,270,NB,,,5.0
136 | 134,TOPN+LR,1.0,120,546,270,LR,,,5.0
137 | 135,TOPN+5NN,0.9926,120,546,270,5NN,,,5.0
138 | 136,TOPN+2NN,0.9852,120,546,270,2NN,,,5.0
139 | 137,TOPN+1KNN,0.9852,120,546,270,1KNN,,,5.0
140 | 138,TOPN+LSVM,0.9963,120,546,270,LSVM,,,5.0
141 | 139,TOPN+NN100x50,0.9963,120,546,270,NN100x50,,,5.0
142 | 140,TOPN+NB,0.9778,296,546,270,NB,,,10.0
143 | 141,TOPN+LR,1.0,296,546,270,LR,,,10.0
144 | 142,TOPN+5NN,0.9704,296,546,270,5NN,,,10.0
145 | 143,TOPN+2NN,0.9556,296,546,270,2NN,,,10.0
146 | 144,TOPN+1KNN,0.9593,296,546,270,1KNN,,,10.0
147 | 145,TOPN+LSVM,0.9963,296,546,270,LSVM,,,10.0
148 | 146,TOPN+NN100x50,0.9926,296,546,270,NN100x50,,,10.0
149 | 147,TOPN+NB,0.9815,526,546,270,NB,,,15.0
150 | 148,TOPN+LR,1.0,526,546,270,LR,,,15.0
151 | 149,TOPN+5NN,0.9481,526,546,270,5NN,,,15.0
152 | 150,TOPN+2NN,0.9667,526,546,270,2NN,,,15.0
153 | 151,TOPN+1KNN,0.9667,526,546,270,1KNN,,,15.0
154 | 152,TOPN+LSVM,0.9963,526,546,270,LSVM,,,15.0
155 | 153,TOPN+NN100x50,0.9926,526,546,270,NN100x50,,,15.0
156 | 154,TOPN+NB,0.9778,758,546,270,NB,,,20.0
157 | 155,TOPN+LR,1.0,758,546,270,LR,,,20.0
158 | 156,TOPN+5NN,0.9407,758,546,270,5NN,,,20.0
159 | 157,TOPN+2NN,0.9519,758,546,270,2NN,,,20.0
160 | 158,TOPN+1KNN,0.9519,758,546,270,1KNN,,,20.0
161 | 159,TOPN+LSVM,0.9963,758,546,270,LSVM,,,20.0
162 | 160,TOPN+NN100x50,1.0,758,546,270,NN100x50,,,20.0
163 | 161,TOPN+NB,0.9778,995,546,270,NB,,,25.0
164 | 162,TOPN+LR,1.0,995,546,270,LR,,,25.0
165 | 163,TOPN+5NN,0.9259,995,546,270,5NN,,,25.0
166 | 164,TOPN+2NN,0.9519,995,546,270,2NN,,,25.0
167 | 165,TOPN+1KNN,0.9519,995,546,270,1KNN,,,25.0
168 | 166,TOPN+LSVM,0.9963,995,546,270,LSVM,,,25.0
169 | 167,TOPN+NN100x50,1.0,995,546,270,NN100x50,,,25.0
170 | 168,TOPN+NB,0.9963,2274,546,270,NB,,,50.0
171 | 169,TOPN+LR,1.0,2274,546,270,LR,,,50.0
172 | 170,TOPN+5NN,0.8926,2274,546,270,5NN,,,50.0
173 | 171,TOPN+2NN,0.9333,2274,546,270,2NN,,,50.0
174 | 172,TOPN+1KNN,0.9333,2274,546,270,1KNN,,,50.0
175 | 173,TOPN+LSVM,0.9963,2274,546,270,LSVM,,,50.0
176 | 174,TOPN+NN100x50,1.0,2274,546,270,NN100x50,,,50.0
177 | 175,TOPN+NB,0.9963,4443,546,270,NB,,,100.0
178 | 176,TOPN+LR,1.0,4443,546,270,LR,,,100.0
179 | 177,TOPN+5NN,0.8778,4443,546,270,5NN,,,100.0
180 | 178,TOPN+2NN,0.9333,4443,546,270,2NN,,,100.0
181 | 179,TOPN+1KNN,0.9333,4443,546,270,1KNN,,,100.0
182 | 180,TOPN+LSVM,0.9963,4443,546,270,LSVM,,,100.0
183 | 181,TOPN+NN100x50,0.9963,4443,546,270,NN100x50,,,100.0
184 | 182,TOPN+NB,0.9963,8975,546,270,NB,,,250.0
185 | 183,TOPN+LR,1.0,8975,546,270,LR,,,250.0
186 | 184,TOPN+5NN,0.8481,8975,546,270,5NN,,,250.0
187 | 185,TOPN+2NN,0.9259,8975,546,270,2NN,,,250.0
188 | 186,TOPN+1KNN,0.9259,8975,546,270,1KNN,,,250.0
189 | 187,TOPN+LSVM,0.9963,8975,546,270,LSVM,,,250.0
190 | 188,TOPN+NN100x50,1.0,8975,546,270,NN100x50,,,250.0
191 | 189,TOPN+NB,0.9963,12104,546,270,NB,,,500.0
192 | 190,TOPN+LR,1.0,12104,546,270,LR,,,500.0
193 | 191,TOPN+5NN,0.8407,12104,546,270,5NN,,,500.0
194 | 192,TOPN+2NN,0.9259,12104,546,270,2NN,,,500.0
195 | 193,TOPN+1KNN,0.9259,12104,546,270,1KNN,,,500.0
196 | 194,TOPN+LSVM,0.9963,12104,546,270,LSVM,,,500.0
197 | 195,TOPN+NN100x50,0.9963,12104,546,270,NN100x50,,,500.0
198 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/reuters/REUTERS_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.8191,15514,4501,2217,NB,,,
3 | 1,BOW+LR,0.8746,15514,4501,2217,LR,,,
4 | 2,BOW+5NN,0.7582,15514,4501,2217,5NN,,,
5 | 3,BOW+2NN,0.802,15514,4501,2217,2NN,,,
6 | 4,BOW+1KNN,0.7997,15514,4501,2217,1KNN,,,
7 | 5,BOW+LSVM,0.8742,15514,4501,2217,LSVM,,,
8 | 6,BOW+NN100x50,0.8656,15514,4501,2217,NN100x50,,,
9 | 7,META+NB,0.8376,2494,4501,2217,NB,,,
10 | 8,META+LR,0.876,2494,4501,2217,LR,,,
11 | 9,META+5NN,0.8002,2494,4501,2217,5NN,,,
12 | 10,META+2NN,0.8223,2494,4501,2217,2NN,,,
13 | 11,META+1KNN,0.8245,2494,4501,2217,1KNN,,,
14 | 12,META+LSVM,0.8746,2494,4501,2217,LSVM,,,
15 | 13,META+NN100x50,0.8701,2494,4501,2217,NN100x50,,,
16 | 14,LVAR+NB,0.8372,7624,4501,2217,NB,0.0005,,
17 | 15,LVAR+LR,0.8755,7624,4501,2217,LR,0.0005,,
18 | 16,LVAR+5NN,0.7677,7624,4501,2217,5NN,0.0005,,
19 | 17,LVAR+2NN,0.8097,7624,4501,2217,2NN,0.0005,,
20 | 18,LVAR+1KNN,0.8088,7624,4501,2217,1KNN,0.0005,,
21 | 19,LVAR+LSVM,0.8746,7624,4501,2217,LSVM,0.0005,,
22 | 20,LVAR+NN100x50,0.8705,7624,4501,2217,NN100x50,0.0005,,
23 | 21,LVAR+NB,0.8363,5780,4501,2217,NB,0.001,,
24 | 22,LVAR+LR,0.8733,5780,4501,2217,LR,0.001,,
25 | 23,LVAR+5NN,0.7826,5780,4501,2217,5NN,0.001,,
26 | 24,LVAR+2NN,0.816,5780,4501,2217,2NN,0.001,,
27 | 25,LVAR+1KNN,0.8137,5780,4501,2217,1KNN,0.001,,
28 | 26,LVAR+LSVM,0.8733,5780,4501,2217,LSVM,0.001,,
29 | 27,LVAR+NN100x50,0.8714,5780,4501,2217,NN100x50,0.001,,
30 | 28,LVAR+NB,0.8367,4870,4501,2217,NB,0.0015,,
31 | 29,LVAR+LR,0.8742,4870,4501,2217,LR,0.0015,,
32 | 30,LVAR+5NN,0.7966,4870,4501,2217,5NN,0.0015,,
33 | 31,LVAR+2NN,0.8205,4870,4501,2217,2NN,0.0015,,
34 | 32,LVAR+1KNN,0.8173,4870,4501,2217,1KNN,0.0015,,
35 | 33,LVAR+LSVM,0.8737,4870,4501,2217,LSVM,0.0015,,
36 | 34,LVAR+NN100x50,0.8724,4870,4501,2217,NN100x50,0.0015,,
37 | 35,LVAR+NB,0.8399,4014,4501,2217,NB,0.002,,
38 | 36,LVAR+LR,0.8737,4014,4501,2217,LR,0.002,,
39 | 37,LVAR+5NN,0.8083,4014,4501,2217,5NN,0.002,,
40 | 38,LVAR+2NN,0.8236,4014,4501,2217,2NN,0.002,,
41 | 39,LVAR+1KNN,0.8232,4014,4501,2217,1KNN,0.002,,
42 | 40,LVAR+LSVM,0.8705,4014,4501,2217,LSVM,0.002,,
43 | 41,LVAR+NN100x50,0.8665,4014,4501,2217,NN100x50,0.002,,
44 | 42,LVAR+NB,0.8403,3356,4501,2217,NB,0.003,,
45 | 43,LVAR+LR,0.8755,3356,4501,2217,LR,0.003,,
46 | 44,LVAR+5NN,0.8101,3356,4501,2217,5NN,0.003,,
47 | 45,LVAR+2NN,0.8236,3356,4501,2217,2NN,0.003,,
48 | 46,LVAR+1KNN,0.8272,3356,4501,2217,1KNN,0.003,,
49 | 47,LVAR+LSVM,0.8714,3356,4501,2217,LSVM,0.003,,
50 | 48,LVAR+NN100x50,0.8687,3356,4501,2217,NN100x50,0.003,,
51 | 49,LVAR+NB,0.8381,2772,4501,2217,NB,0.004,,
52 | 50,LVAR+LR,0.8755,2772,4501,2217,LR,0.004,,
53 | 51,LVAR+5NN,0.8106,2772,4501,2217,5NN,0.004,,
54 | 52,LVAR+2NN,0.8245,2772,4501,2217,2NN,0.004,,
55 | 53,LVAR+1KNN,0.8259,2772,4501,2217,1KNN,0.004,,
56 | 54,LVAR+LSVM,0.8692,2772,4501,2217,LSVM,0.004,,
57 | 55,LVAR+NN100x50,0.8724,2772,4501,2217,NN100x50,0.004,,
58 | 56,LVAR+NB,0.839,2458,4501,2217,NB,0.005,,
59 | 57,LVAR+LR,0.8751,2458,4501,2217,LR,0.005,,
60 | 58,LVAR+5NN,0.811,2458,4501,2217,5NN,0.005,,
61 | 59,LVAR+2NN,0.8254,2458,4501,2217,2NN,0.005,,
62 | 60,LVAR+1KNN,0.8268,2458,4501,2217,1KNN,0.005,,
63 | 61,LVAR+LSVM,0.8692,2458,4501,2217,LSVM,0.005,,
64 | 62,LVAR+NN100x50,0.8724,2458,4501,2217,NN100x50,0.005,,
65 | 63,LVAR+NB,0.8349,1482,4501,2217,NB,0.01,,
66 | 64,LVAR+LR,0.8742,1482,4501,2217,LR,0.01,,
67 | 65,LVAR+5NN,0.8209,1482,4501,2217,5NN,0.01,,
68 | 66,LVAR+2NN,0.8295,1482,4501,2217,2NN,0.01,,
69 | 67,LVAR+1KNN,0.8295,1482,4501,2217,1KNN,0.01,,
70 | 68,LVAR+LSVM,0.8669,1482,4501,2217,LSVM,0.01,,
71 | 69,LVAR+NN100x50,0.8674,1482,4501,2217,NN100x50,0.01,,
72 | 70,KBEST+NB,0.8038,1000,4501,2217,NB,,1000.0,
73 | 71,KBEST+LR,0.8358,1000,4501,2217,LR,,1000.0,
74 | 72,KBEST+5NN,0.7939,1000,4501,2217,5NN,,1000.0,
75 | 73,KBEST+2NN,0.8033,1000,4501,2217,2NN,,1000.0,
76 | 74,KBEST+1KNN,0.8029,1000,4501,2217,1KNN,,1000.0,
77 | 75,KBEST+LSVM,0.8295,1000,4501,2217,LSVM,,1000.0,
78 | 76,KBEST+NN100x50,0.83,1000,4501,2217,NN100x50,,1000.0,
79 | 77,KBEST+NB,0.8169,2000,4501,2217,NB,,2000.0,
80 | 78,KBEST+LR,0.8484,2000,4501,2217,LR,,2000.0,
81 | 79,KBEST+5NN,0.7866,2000,4501,2217,5NN,,2000.0,
82 | 80,KBEST+2NN,0.8097,2000,4501,2217,2NN,,2000.0,
83 | 81,KBEST+1KNN,0.8088,2000,4501,2217,1KNN,,2000.0,
84 | 82,KBEST+LSVM,0.8309,2000,4501,2217,LSVM,,2000.0,
85 | 83,KBEST+NN100x50,0.8331,2000,4501,2217,NN100x50,,2000.0,
86 | 84,KBEST+NB,0.8187,3000,4501,2217,NB,,3000.0,
87 | 85,KBEST+LR,0.8633,3000,4501,2217,LR,,3000.0,
88 | 86,KBEST+5NN,0.793,3000,4501,2217,5NN,,3000.0,
89 | 87,KBEST+2NN,0.8119,3000,4501,2217,2NN,,3000.0,
90 | 88,KBEST+1KNN,0.8124,3000,4501,2217,1KNN,,3000.0,
91 | 89,KBEST+LSVM,0.8507,3000,4501,2217,LSVM,,3000.0,
92 | 90,KBEST+NN100x50,0.8579,3000,4501,2217,NN100x50,,3000.0,
93 | 91,KBEST+NB,0.8272,4000,4501,2217,NB,,4000.0,
94 | 92,KBEST+LR,0.8687,4000,4501,2217,LR,,4000.0,
95 | 93,KBEST+5NN,0.788,4000,4501,2217,5NN,,4000.0,
96 | 94,KBEST+2NN,0.8182,4000,4501,2217,2NN,,4000.0,
97 | 95,KBEST+1KNN,0.8187,4000,4501,2217,1KNN,,4000.0,
98 | 96,KBEST+LSVM,0.8602,4000,4501,2217,LSVM,,4000.0,
99 | 97,KBEST+NN100x50,0.8552,4000,4501,2217,NN100x50,,4000.0,
100 | 98,KBEST+NB,0.8259,5000,4501,2217,NB,,5000.0,
101 | 99,KBEST+LR,0.8687,5000,4501,2217,LR,,5000.0,
102 | 100,KBEST+5NN,0.7889,5000,4501,2217,5NN,,5000.0,
103 | 101,KBEST+2NN,0.8128,5000,4501,2217,2NN,,5000.0,
104 | 102,KBEST+1KNN,0.8142,5000,4501,2217,1KNN,,5000.0,
105 | 103,KBEST+LSVM,0.8638,5000,4501,2217,LSVM,,5000.0,
106 | 104,KBEST+NN100x50,0.8633,5000,4501,2217,NN100x50,,5000.0,
107 | 105,KBEST+NB,0.8277,6000,4501,2217,NB,,6000.0,
108 | 106,KBEST+LR,0.8737,6000,4501,2217,LR,,6000.0,
109 | 107,KBEST+5NN,0.7957,6000,4501,2217,5NN,,6000.0,
110 | 108,KBEST+2NN,0.8191,6000,4501,2217,2NN,,6000.0,
111 | 109,KBEST+1KNN,0.82,6000,4501,2217,1KNN,,6000.0,
112 | 110,KBEST+LSVM,0.8714,6000,4501,2217,LSVM,,6000.0,
113 | 111,KBEST+NN100x50,0.8665,6000,4501,2217,NN100x50,,6000.0,
114 | 112,KBEST+NB,0.8354,7000,4501,2217,NB,,7000.0,
115 | 113,KBEST+LR,0.8751,7000,4501,2217,LR,,7000.0,
116 | 114,KBEST+5NN,0.7952,7000,4501,2217,5NN,,7000.0,
117 | 115,KBEST+2NN,0.8205,7000,4501,2217,2NN,,7000.0,
118 | 116,KBEST+1KNN,0.8205,7000,4501,2217,1KNN,,7000.0,
119 | 117,KBEST+LSVM,0.8737,7000,4501,2217,LSVM,,7000.0,
120 | 118,KBEST+NN100x50,0.8696,7000,4501,2217,NN100x50,,7000.0,
121 | 119,KBEST+NB,0.8336,10000,4501,2217,NB,,10000.0,
122 | 120,KBEST+LR,0.8764,10000,4501,2217,LR,,10000.0,
123 | 121,KBEST+5NN,0.7939,10000,4501,2217,5NN,,10000.0,
124 | 122,KBEST+2NN,0.816,10000,4501,2217,2NN,,10000.0,
125 | 123,KBEST+1KNN,0.8191,10000,4501,2217,1KNN,,10000.0,
126 | 124,KBEST+LSVM,0.8719,10000,4501,2217,LSVM,,10000.0,
127 | 125,KBEST+NN100x50,0.8624,10000,4501,2217,NN100x50,,10000.0,
128 | 126,KBEST+NB,0.8236,14000,4501,2217,NB,,14000.0,
129 | 127,KBEST+LR,0.8728,14000,4501,2217,LR,,14000.0,
130 | 128,KBEST+5NN,0.7497,14000,4501,2217,5NN,,14000.0,
131 | 129,KBEST+2NN,0.7961,14000,4501,2217,2NN,,14000.0,
132 | 130,KBEST+1KNN,0.7948,14000,4501,2217,1KNN,,14000.0,
133 | 131,KBEST+LSVM,0.8737,14000,4501,2217,LSVM,,14000.0,
134 | 132,KBEST+NN100x50,0.8705,14000,4501,2217,NN100x50,,14000.0,
135 | 133,TOPN+NB,0.8033,293,4501,2217,NB,,,5.0
136 | 134,TOPN+LR,0.8399,293,4501,2217,LR,,,5.0
137 | 135,TOPN+5NN,0.8164,293,4501,2217,5NN,,,5.0
138 | 136,TOPN+2NN,0.8083,293,4501,2217,2NN,,,5.0
139 | 137,TOPN+1KNN,0.8078,293,4501,2217,1KNN,,,5.0
140 | 138,TOPN+LSVM,0.8214,293,4501,2217,LSVM,,,5.0
141 | 139,TOPN+NN100x50,0.8336,293,4501,2217,NN100x50,,,5.0
142 | 140,TOPN+NB,0.8295,809,4501,2217,NB,,,10.0
143 | 141,TOPN+LR,0.8642,809,4501,2217,LR,,,10.0
144 | 142,TOPN+5NN,0.8272,809,4501,2217,5NN,,,10.0
145 | 143,TOPN+2NN,0.8259,809,4501,2217,2NN,,,10.0
146 | 144,TOPN+1KNN,0.8241,809,4501,2217,1KNN,,,10.0
147 | 145,TOPN+LSVM,0.8466,809,4501,2217,LSVM,,,10.0
148 | 146,TOPN+NN100x50,0.8548,809,4501,2217,NN100x50,,,10.0
149 | 147,TOPN+NB,0.8331,1615,4501,2217,NB,,,15.0
150 | 148,TOPN+LR,0.8724,1615,4501,2217,LR,,,15.0
151 | 149,TOPN+5NN,0.8232,1615,4501,2217,5NN,,,15.0
152 | 150,TOPN+2NN,0.8313,1615,4501,2217,2NN,,,15.0
153 | 151,TOPN+1KNN,0.8309,1615,4501,2217,1KNN,,,15.0
154 | 152,TOPN+LSVM,0.8606,1615,4501,2217,LSVM,,,15.0
155 | 153,TOPN+NN100x50,0.8674,1615,4501,2217,NN100x50,,,15.0
156 | 154,TOPN+NB,0.8372,2430,4501,2217,NB,,,20.0
157 | 155,TOPN+LR,0.8728,2430,4501,2217,LR,,,20.0
158 | 156,TOPN+5NN,0.8155,2430,4501,2217,5NN,,,20.0
159 | 157,TOPN+2NN,0.8295,2430,4501,2217,2NN,,,20.0
160 | 158,TOPN+1KNN,0.8309,2430,4501,2217,1KNN,,,20.0
161 | 159,TOPN+LSVM,0.8665,2430,4501,2217,LSVM,,,20.0
162 | 160,TOPN+NN100x50,0.8701,2430,4501,2217,NN100x50,,,20.0
163 | 161,TOPN+NB,0.839,3182,4501,2217,NB,,,25.0
164 | 162,TOPN+LR,0.8742,3182,4501,2217,LR,,,25.0
165 | 163,TOPN+5NN,0.811,3182,4501,2217,5NN,,,25.0
166 | 164,TOPN+2NN,0.8272,3182,4501,2217,2NN,,,25.0
167 | 165,TOPN+1KNN,0.8268,3182,4501,2217,1KNN,,,25.0
168 | 166,TOPN+LSVM,0.8678,3182,4501,2217,LSVM,,,25.0
169 | 167,TOPN+NN100x50,0.8687,3182,4501,2217,NN100x50,,,25.0
170 | 168,TOPN+NB,0.8372,5359,4501,2217,NB,,,50.0
171 | 169,TOPN+LR,0.8755,5359,4501,2217,LR,,,50.0
172 | 170,TOPN+5NN,0.802,5359,4501,2217,5NN,,,50.0
173 | 171,TOPN+2NN,0.8263,5359,4501,2217,2NN,,,50.0
174 | 172,TOPN+1KNN,0.8268,5359,4501,2217,1KNN,,,50.0
175 | 173,TOPN+LSVM,0.8724,5359,4501,2217,LSVM,,,50.0
176 | 174,TOPN+NN100x50,0.8714,5359,4501,2217,NN100x50,,,50.0
177 | 175,TOPN+NB,0.8399,7171,4501,2217,NB,,,100.0
178 | 176,TOPN+LR,0.8782,7171,4501,2217,LR,,,100.0
179 | 177,TOPN+5NN,0.7993,7171,4501,2217,5NN,,,100.0
180 | 178,TOPN+2NN,0.8232,7171,4501,2217,2NN,,,100.0
181 | 179,TOPN+1KNN,0.8227,7171,4501,2217,1KNN,,,100.0
182 | 180,TOPN+LSVM,0.8737,7171,4501,2217,LSVM,,,100.0
183 | 181,TOPN+NN100x50,0.8674,7171,4501,2217,NN100x50,,,100.0
184 | 182,TOPN+NB,0.8381,8187,4501,2217,NB,,,250.0
185 | 183,TOPN+LR,0.876,8187,4501,2217,LR,,,250.0
186 | 184,TOPN+5NN,0.7979,8187,4501,2217,5NN,,,250.0
187 | 185,TOPN+2NN,0.8232,8187,4501,2217,2NN,,,250.0
188 | 186,TOPN+1KNN,0.8232,8187,4501,2217,1KNN,,,250.0
189 | 187,TOPN+LSVM,0.8724,8187,4501,2217,LSVM,,,250.0
190 | 188,TOPN+NN100x50,0.8733,8187,4501,2217,NN100x50,,,250.0
191 | 189,TOPN+NB,0.8376,8246,4501,2217,NB,,,500.0
192 | 190,TOPN+LR,0.8778,8246,4501,2217,LR,,,500.0
193 | 191,TOPN+5NN,0.7975,8246,4501,2217,5NN,,,500.0
194 | 192,TOPN+2NN,0.8232,8246,4501,2217,2NN,,,500.0
195 | 193,TOPN+1KNN,0.8232,8246,4501,2217,1KNN,,,500.0
196 | 194,TOPN+LSVM,0.8733,8246,4501,2217,LSVM,,,500.0
197 | 195,TOPN+NN100x50,0.8683,8246,4501,2217,NN100x50,,,500.0
198 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/jira_issues/JIRAISSUES_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.6989,14539,3229,1591,NB,,,
3 | 1,BOW+LR,0.7461,14539,3229,1591,LR,,,
4 | 2,BOW+5NN,0.6486,14539,3229,1591,5NN,,,
5 | 3,BOW+2NN,0.6644,14539,3229,1591,2NN,,,
6 | 4,BOW+1KNN,0.6637,14539,3229,1591,1KNN,,,
7 | 5,BOW+LSVM,0.7304,14539,3229,1591,LSVM,,,
8 | 6,BOW+NN100x50,0.741,14539,3229,1591,NN100x50,,,
9 | 7,META+NB,0.6989,2942,3229,1591,NB,,,
10 | 8,META+LR,0.7461,2942,3229,1591,LR,,,
11 | 9,META+5NN,0.6329,2942,3229,1591,5NN,,,
12 | 10,META+2NN,0.6662,2942,3229,1591,2NN,,,
13 | 11,META+1KNN,0.6688,2942,3229,1591,1KNN,,,
14 | 12,META+LSVM,0.731,2942,3229,1591,LSVM,,,
15 | 13,META+NN100x50,0.7467,2942,3229,1591,NN100x50,,,
16 | 14,LVAR+NB,0.6933,9706,3229,1591,NB,0.0005,,
17 | 15,LVAR+LR,0.7442,9706,3229,1591,LR,0.0005,,
18 | 16,LVAR+5NN,0.6411,9706,3229,1591,5NN,0.0005,,
19 | 17,LVAR+2NN,0.6593,9706,3229,1591,2NN,0.0005,,
20 | 18,LVAR+1KNN,0.6512,9706,3229,1591,1KNN,0.0005,,
21 | 19,LVAR+LSVM,0.7291,9706,3229,1591,LSVM,0.0005,,
22 | 20,LVAR+NN100x50,0.7379,9706,3229,1591,NN100x50,0.0005,,
23 | 21,LVAR+NB,0.6895,8221,3229,1591,NB,0.001,,
24 | 22,LVAR+LR,0.7404,8221,3229,1591,LR,0.001,,
25 | 23,LVAR+5NN,0.6386,8221,3229,1591,5NN,0.001,,
26 | 24,LVAR+2NN,0.6644,8221,3229,1591,2NN,0.001,,
27 | 25,LVAR+1KNN,0.6562,8221,3229,1591,1KNN,0.001,,
28 | 26,LVAR+LSVM,0.7197,8221,3229,1591,LSVM,0.001,,
29 | 27,LVAR+NN100x50,0.7348,8221,3229,1591,NN100x50,0.001,,
30 | 28,LVAR+NB,0.6882,6489,3229,1591,NB,0.0015,,
31 | 29,LVAR+LR,0.736,6489,3229,1591,LR,0.0015,,
32 | 30,LVAR+5NN,0.6279,6489,3229,1591,5NN,0.0015,,
33 | 31,LVAR+2NN,0.6574,6489,3229,1591,2NN,0.0015,,
34 | 32,LVAR+1KNN,0.6543,6489,3229,1591,1KNN,0.0015,,
35 | 33,LVAR+LSVM,0.7178,6489,3229,1591,LSVM,0.0015,,
36 | 34,LVAR+NN100x50,0.7398,6489,3229,1591,NN100x50,0.0015,,
37 | 35,LVAR+NB,0.6889,5833,3229,1591,NB,0.002,,
38 | 36,LVAR+LR,0.7373,5833,3229,1591,LR,0.002,,
39 | 37,LVAR+5NN,0.6273,5833,3229,1591,5NN,0.002,,
40 | 38,LVAR+2NN,0.6618,5833,3229,1591,2NN,0.002,,
41 | 39,LVAR+1KNN,0.6574,5833,3229,1591,1KNN,0.002,,
42 | 40,LVAR+LSVM,0.719,5833,3229,1591,LSVM,0.002,,
43 | 41,LVAR+NN100x50,0.7304,5833,3229,1591,NN100x50,0.002,,
44 | 42,LVAR+NB,0.6857,4821,3229,1591,NB,0.003,,
45 | 43,LVAR+LR,0.7316,4821,3229,1591,LR,0.003,,
46 | 44,LVAR+5NN,0.6455,4821,3229,1591,5NN,0.003,,
47 | 45,LVAR+2NN,0.6637,4821,3229,1591,2NN,0.003,,
48 | 46,LVAR+1KNN,0.6669,4821,3229,1591,1KNN,0.003,,
49 | 47,LVAR+LSVM,0.7128,4821,3229,1591,LSVM,0.003,,
50 | 48,LVAR+NN100x50,0.7385,4821,3229,1591,NN100x50,0.003,,
51 | 49,LVAR+NB,0.6801,4450,3229,1591,NB,0.004,,
52 | 50,LVAR+LR,0.7322,4450,3229,1591,LR,0.004,,
53 | 51,LVAR+5NN,0.6354,4450,3229,1591,5NN,0.004,,
54 | 52,LVAR+2NN,0.6644,4450,3229,1591,2NN,0.004,,
55 | 53,LVAR+1KNN,0.6656,4450,3229,1591,1KNN,0.004,,
56 | 54,LVAR+LSVM,0.7165,4450,3229,1591,LSVM,0.004,,
57 | 55,LVAR+NN100x50,0.7304,4450,3229,1591,NN100x50,0.004,,
58 | 56,LVAR+NB,0.6719,3766,3229,1591,NB,0.005,,
59 | 57,LVAR+LR,0.7316,3766,3229,1591,LR,0.005,,
60 | 58,LVAR+5NN,0.6354,3766,3229,1591,5NN,0.005,,
61 | 59,LVAR+2NN,0.6499,3766,3229,1591,2NN,0.005,,
62 | 60,LVAR+1KNN,0.6537,3766,3229,1591,1KNN,0.005,,
63 | 61,LVAR+LSVM,0.719,3766,3229,1591,LSVM,0.005,,
64 | 62,LVAR+NN100x50,0.7291,3766,3229,1591,NN100x50,0.005,,
65 | 63,LVAR+NB,0.6713,2771,3229,1591,NB,0.01,,
66 | 64,LVAR+LR,0.7297,2771,3229,1591,LR,0.01,,
67 | 65,LVAR+5NN,0.6493,2771,3229,1591,5NN,0.01,,
68 | 66,LVAR+2NN,0.6449,2771,3229,1591,2NN,0.01,,
69 | 67,LVAR+1KNN,0.6474,2771,3229,1591,1KNN,0.01,,
70 | 68,LVAR+LSVM,0.7134,2771,3229,1591,LSVM,0.01,,
71 | 69,LVAR+NN100x50,0.7285,2771,3229,1591,NN100x50,0.01,,
72 | 70,KBEST+NB,0.6235,1000,3229,1591,NB,,1000.0,
73 | 71,KBEST+LR,0.6776,1000,3229,1591,LR,,1000.0,
74 | 72,KBEST+5NN,0.616,1000,3229,1591,5NN,,1000.0,
75 | 73,KBEST+2NN,0.638,1000,3229,1591,2NN,,1000.0,
76 | 74,KBEST+1KNN,0.6273,1000,3229,1591,1KNN,,1000.0,
77 | 75,KBEST+LSVM,0.675,1000,3229,1591,LSVM,,1000.0,
78 | 76,KBEST+NN100x50,0.6725,1000,3229,1591,NN100x50,,1000.0,
79 | 77,KBEST+NB,0.6719,2000,3229,1591,NB,,2000.0,
80 | 78,KBEST+LR,0.709,2000,3229,1591,LR,,2000.0,
81 | 79,KBEST+5NN,0.6449,2000,3229,1591,5NN,,2000.0,
82 | 80,KBEST+2NN,0.6493,2000,3229,1591,2NN,,2000.0,
83 | 81,KBEST+1KNN,0.6474,2000,3229,1591,1KNN,,2000.0,
84 | 82,KBEST+LSVM,0.7002,2000,3229,1591,LSVM,,2000.0,
85 | 83,KBEST+NN100x50,0.7046,2000,3229,1591,NN100x50,,2000.0,
86 | 84,KBEST+NB,0.6625,3000,3229,1591,NB,,3000.0,
87 | 85,KBEST+LR,0.7184,3000,3229,1591,LR,,3000.0,
88 | 86,KBEST+5NN,0.6449,3000,3229,1591,5NN,,3000.0,
89 | 87,KBEST+2NN,0.648,3000,3229,1591,2NN,,3000.0,
90 | 88,KBEST+1KNN,0.6499,3000,3229,1591,1KNN,,3000.0,
91 | 89,KBEST+LSVM,0.7046,3000,3229,1591,LSVM,,3000.0,
92 | 90,KBEST+NN100x50,0.7165,3000,3229,1591,NN100x50,,3000.0,
93 | 91,KBEST+NB,0.6763,4000,3229,1591,NB,,4000.0,
94 | 92,KBEST+LR,0.7291,4000,3229,1591,LR,,4000.0,
95 | 93,KBEST+5NN,0.6644,4000,3229,1591,5NN,,4000.0,
96 | 94,KBEST+2NN,0.6562,4000,3229,1591,2NN,,4000.0,
97 | 95,KBEST+1KNN,0.6556,4000,3229,1591,1KNN,,4000.0,
98 | 96,KBEST+LSVM,0.7134,4000,3229,1591,LSVM,,4000.0,
99 | 97,KBEST+NN100x50,0.7278,4000,3229,1591,NN100x50,,4000.0,
100 | 98,KBEST+NB,0.682,5000,3229,1591,NB,,5000.0,
101 | 99,KBEST+LR,0.7304,5000,3229,1591,LR,,5000.0,
102 | 100,KBEST+5NN,0.6568,5000,3229,1591,5NN,,5000.0,
103 | 101,KBEST+2NN,0.6644,5000,3229,1591,2NN,,5000.0,
104 | 102,KBEST+1KNN,0.6631,5000,3229,1591,1KNN,,5000.0,
105 | 103,KBEST+LSVM,0.7209,5000,3229,1591,LSVM,,5000.0,
106 | 104,KBEST+NN100x50,0.7234,5000,3229,1591,NN100x50,,5000.0,
107 | 105,KBEST+NB,0.6901,6000,3229,1591,NB,,6000.0,
108 | 106,KBEST+LR,0.7461,6000,3229,1591,LR,,6000.0,
109 | 107,KBEST+5NN,0.6518,6000,3229,1591,5NN,,6000.0,
110 | 108,KBEST+2NN,0.6606,6000,3229,1591,2NN,,6000.0,
111 | 109,KBEST+1KNN,0.6606,6000,3229,1591,1KNN,,6000.0,
112 | 110,KBEST+LSVM,0.7253,6000,3229,1591,LSVM,,6000.0,
113 | 111,KBEST+NN100x50,0.7341,6000,3229,1591,NN100x50,,6000.0,
114 | 112,KBEST+NB,0.6914,7000,3229,1591,NB,,7000.0,
115 | 113,KBEST+LR,0.7354,7000,3229,1591,LR,,7000.0,
116 | 114,KBEST+5NN,0.643,7000,3229,1591,5NN,,7000.0,
117 | 115,KBEST+2NN,0.6562,7000,3229,1591,2NN,,7000.0,
118 | 116,KBEST+1KNN,0.6562,7000,3229,1591,1KNN,,7000.0,
119 | 117,KBEST+LSVM,0.7146,7000,3229,1591,LSVM,,7000.0,
120 | 118,KBEST+NN100x50,0.704,7000,3229,1591,NN100x50,,7000.0,
121 | 119,KBEST+NB,0.6914,10000,3229,1591,NB,,10000.0,
122 | 120,KBEST+LR,0.7454,10000,3229,1591,LR,,10000.0,
123 | 121,KBEST+5NN,0.6455,10000,3229,1591,5NN,,10000.0,
124 | 122,KBEST+2NN,0.6662,10000,3229,1591,2NN,,10000.0,
125 | 123,KBEST+1KNN,0.6631,10000,3229,1591,1KNN,,10000.0,
126 | 124,KBEST+LSVM,0.7128,10000,3229,1591,LSVM,,10000.0,
127 | 125,KBEST+NN100x50,0.7423,10000,3229,1591,NN100x50,,10000.0,
128 | 126,KBEST+NB,0.6952,14000,3229,1591,NB,,14000.0,
129 | 127,KBEST+LR,0.7436,14000,3229,1591,LR,,14000.0,
130 | 128,KBEST+5NN,0.6449,14000,3229,1591,5NN,,14000.0,
131 | 129,KBEST+2NN,0.6675,14000,3229,1591,2NN,,14000.0,
132 | 130,KBEST+1KNN,0.6593,14000,3229,1591,1KNN,,14000.0,
133 | 131,KBEST+LSVM,0.7222,14000,3229,1591,LSVM,,14000.0,
134 | 132,KBEST+NN100x50,0.7335,14000,3229,1591,NN100x50,,14000.0,
135 | 133,TOPN+NB,0.687,905,3229,1591,NB,,,5.0
136 | 134,TOPN+LR,0.7335,905,3229,1591,LR,,,5.0
137 | 135,TOPN+5NN,0.682,905,3229,1591,5NN,,,5.0
138 | 136,TOPN+2NN,0.6675,905,3229,1591,2NN,,,5.0
139 | 137,TOPN+1KNN,0.6744,905,3229,1591,1KNN,,,5.0
140 | 138,TOPN+LSVM,0.719,905,3229,1591,LSVM,,,5.0
141 | 139,TOPN+NN100x50,0.7278,905,3229,1591,NN100x50,,,5.0
142 | 140,TOPN+NB,0.6933,1533,3229,1591,NB,,,10.0
143 | 141,TOPN+LR,0.7392,1533,3229,1591,LR,,,10.0
144 | 142,TOPN+5NN,0.6662,1533,3229,1591,5NN,,,10.0
145 | 143,TOPN+2NN,0.6757,1533,3229,1591,2NN,,,10.0
146 | 144,TOPN+1KNN,0.675,1533,3229,1591,1KNN,,,10.0
147 | 145,TOPN+LSVM,0.7285,1533,3229,1591,LSVM,,,10.0
148 | 146,TOPN+NN100x50,0.7417,1533,3229,1591,NN100x50,,,10.0
149 | 147,TOPN+NB,0.6945,2064,3229,1591,NB,,,15.0
150 | 148,TOPN+LR,0.7467,2064,3229,1591,LR,,,15.0
151 | 149,TOPN+5NN,0.6574,2064,3229,1591,5NN,,,15.0
152 | 150,TOPN+2NN,0.6826,2064,3229,1591,2NN,,,15.0
153 | 151,TOPN+1KNN,0.682,2064,3229,1591,1KNN,,,15.0
154 | 152,TOPN+LSVM,0.7285,2064,3229,1591,LSVM,,,15.0
155 | 153,TOPN+NN100x50,0.7536,2064,3229,1591,NN100x50,,,15.0
156 | 154,TOPN+NB,0.692,2598,3229,1591,NB,,,20.0
157 | 155,TOPN+LR,0.7517,2598,3229,1591,LR,,,20.0
158 | 156,TOPN+5NN,0.6656,2598,3229,1591,5NN,,,20.0
159 | 157,TOPN+2NN,0.6794,2598,3229,1591,2NN,,,20.0
160 | 158,TOPN+1KNN,0.6807,2598,3229,1591,1KNN,,,20.0
161 | 159,TOPN+LSVM,0.7278,2598,3229,1591,LSVM,,,20.0
162 | 160,TOPN+NN100x50,0.7511,2598,3229,1591,NN100x50,,,20.0
163 | 161,TOPN+NB,0.6933,3045,3229,1591,NB,,,25.0
164 | 162,TOPN+LR,0.7473,3045,3229,1591,LR,,,25.0
165 | 163,TOPN+5NN,0.6549,3045,3229,1591,5NN,,,25.0
166 | 164,TOPN+2NN,0.6763,3045,3229,1591,2NN,,,25.0
167 | 165,TOPN+1KNN,0.6788,3045,3229,1591,1KNN,,,25.0
168 | 166,TOPN+LSVM,0.7348,3045,3229,1591,LSVM,,,25.0
169 | 167,TOPN+NN100x50,0.7234,3045,3229,1591,NN100x50,,,25.0
170 | 168,TOPN+NB,0.6958,4424,3229,1591,NB,,,50.0
171 | 169,TOPN+LR,0.7511,4424,3229,1591,LR,,,50.0
172 | 170,TOPN+5NN,0.6468,4424,3229,1591,5NN,,,50.0
173 | 171,TOPN+2NN,0.6694,4424,3229,1591,2NN,,,50.0
174 | 172,TOPN+1KNN,0.6782,4424,3229,1591,1KNN,,,50.0
175 | 173,TOPN+LSVM,0.7316,4424,3229,1591,LSVM,,,50.0
176 | 174,TOPN+NN100x50,0.7436,4424,3229,1591,NN100x50,,,50.0
177 | 175,TOPN+NB,0.6914,5595,3229,1591,NB,,,100.0
178 | 176,TOPN+LR,0.7511,5595,3229,1591,LR,,,100.0
179 | 177,TOPN+5NN,0.643,5595,3229,1591,5NN,,,100.0
180 | 178,TOPN+2NN,0.6744,5595,3229,1591,2NN,,,100.0
181 | 179,TOPN+1KNN,0.6763,5595,3229,1591,1KNN,,,100.0
182 | 180,TOPN+LSVM,0.7272,5595,3229,1591,LSVM,,,100.0
183 | 181,TOPN+NN100x50,0.7473,5595,3229,1591,NN100x50,,,100.0
184 | 182,TOPN+NB,0.6958,6404,3229,1591,NB,,,250.0
185 | 183,TOPN+LR,0.7448,6404,3229,1591,LR,,,250.0
186 | 184,TOPN+5NN,0.6449,6404,3229,1591,5NN,,,250.0
187 | 185,TOPN+2NN,0.6725,6404,3229,1591,2NN,,,250.0
188 | 186,TOPN+1KNN,0.6738,6404,3229,1591,1KNN,,,250.0
189 | 187,TOPN+LSVM,0.7304,6404,3229,1591,LSVM,,,250.0
190 | 188,TOPN+NN100x50,0.7542,6404,3229,1591,NN100x50,,,250.0
191 | 189,TOPN+NB,0.6958,6654,3229,1591,NB,,,500.0
192 | 190,TOPN+LR,0.7436,6654,3229,1591,LR,,,500.0
193 | 191,TOPN+5NN,0.6417,6654,3229,1591,5NN,,,500.0
194 | 192,TOPN+2NN,0.6725,6654,3229,1591,2NN,,,500.0
195 | 193,TOPN+1KNN,0.6706,6654,3229,1591,1KNN,,,500.0
196 | 194,TOPN+LSVM,0.7291,6654,3229,1591,LSVM,,,500.0
197 | 195,TOPN+NN100x50,0.7486,6654,3229,1591,NN100x50,,,500.0
198 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/20newsgroups/20NEWSGROUPS_evaluation_results.csv:
--------------------------------------------------------------------------------
1 | ,Method,Accuracy,Number of features,Train size,Test size,Classifier,variance thershold,kbest,top_n
2 | 0,BOW+NB,0.9361,62384,7778,3832,NB,,,
3 | 1,BOW+LR,0.9387,62384,7778,3832,LR,,,
4 | 2,BOW+5NN,0.643,62384,7778,3832,5NN,,,
5 | 3,BOW+2NN,0.7557,62384,7778,3832,2NN,,,
6 | 4,BOW+1KNN,0.7597,62384,7778,3832,1KNN,,,
7 | 5,BOW+LSVM,0.9408,62384,7778,3832,LSVM,,,
8 | 6,BOW+NN100x50,0.9546,62384,7778,3832,NN100x50,,,
9 | 7,BOW+NN500x250,0.959,62384,7778,3832,NN500x250,,,
10 | 8,META+NB,0.9387,14907,7778,3832,NB,,,
11 | 9,META+LR,0.9376,14907,7778,3832,LR,,,
12 | 10,META+5NN,0.6542,14907,7778,3832,5NN,,,
13 | 11,META+2NN,0.7607,14907,7778,3832,2NN,,,
14 | 12,META+1KNN,0.7638,14907,7778,3832,1KNN,,,
15 | 13,META+LSVM,0.9408,14907,7778,3832,LSVM,,,
16 | 14,META+NN100x50,0.952,14907,7778,3832,NN100x50,,,
17 | 15,META+NN500x250,0.9562,14907,7778,3832,NN500x250,,,
18 | 16,LVAR+NB,0.94,29992,7778,3832,NB,0.0005,,
19 | 17,LVAR+LR,0.9384,29992,7778,3832,LR,0.0005,,
20 | 18,LVAR+5NN,0.6341,29992,7778,3832,5NN,0.0005,,
21 | 19,LVAR+2NN,0.7526,29992,7778,3832,2NN,0.0005,,
22 | 20,LVAR+1KNN,0.7466,29992,7778,3832,1KNN,0.0005,,
23 | 21,LVAR+LSVM,0.9368,29992,7778,3832,LSVM,0.0005,,
24 | 22,LVAR+NN100x50,0.9541,29992,7778,3832,NN100x50,0.0005,,
25 | 23,LVAR+NN500x250,0.9609,29992,7778,3832,NN500x250,0.0005,,
26 | 24,LVAR+NB,0.9363,20410,7778,3832,NB,0.001,,
27 | 25,LVAR+LR,0.9358,20410,7778,3832,LR,0.001,,
28 | 26,LVAR+5NN,0.6292,20410,7778,3832,5NN,0.001,,
29 | 27,LVAR+2NN,0.7461,20410,7778,3832,2NN,0.001,,
30 | 28,LVAR+1KNN,0.744,20410,7778,3832,1KNN,0.001,,
31 | 29,LVAR+LSVM,0.9303,20410,7778,3832,LSVM,0.001,,
32 | 30,LVAR+NN100x50,0.9517,20410,7778,3832,NN100x50,0.001,,
33 | 31,LVAR+NN500x250,0.9549,20410,7778,3832,NN500x250,0.001,,
34 | 32,LVAR+NB,0.9306,15779,7778,3832,NB,0.0015,,
35 | 33,LVAR+LR,0.9316,15779,7778,3832,LR,0.0015,,
36 | 34,LVAR+5NN,0.6626,15779,7778,3832,5NN,0.0015,,
37 | 35,LVAR+2NN,0.7445,15779,7778,3832,2NN,0.0015,,
38 | 36,LVAR+1KNN,0.7456,15779,7778,3832,1KNN,0.0015,,
39 | 37,LVAR+LSVM,0.929,15779,7778,3832,LSVM,0.0015,,
40 | 38,LVAR+NN100x50,0.9502,15779,7778,3832,NN100x50,0.0015,,
41 | 39,LVAR+NN500x250,0.9538,15779,7778,3832,NN500x250,0.0015,,
42 | 40,LVAR+NB,0.9282,13521,7778,3832,NB,0.002,,
43 | 41,LVAR+LR,0.9308,13521,7778,3832,LR,0.002,,
44 | 42,LVAR+5NN,0.6827,13521,7778,3832,5NN,0.002,,
45 | 43,LVAR+2NN,0.7435,13521,7778,3832,2NN,0.002,,
46 | 44,LVAR+1KNN,0.7456,13521,7778,3832,1KNN,0.002,,
47 | 45,LVAR+LSVM,0.9251,13521,7778,3832,LSVM,0.002,,
48 | 46,LVAR+NN100x50,0.9434,13521,7778,3832,NN100x50,0.002,,
49 | 47,LVAR+NN500x250,0.9512,13521,7778,3832,NN500x250,0.002,,
50 | 48,LVAR+NB,0.9212,10469,7778,3832,NB,0.003,,
51 | 49,LVAR+LR,0.9277,10469,7778,3832,LR,0.003,,
52 | 50,LVAR+5NN,0.6699,10469,7778,3832,5NN,0.003,,
53 | 51,LVAR+2NN,0.7469,10469,7778,3832,2NN,0.003,,
54 | 52,LVAR+1KNN,0.7479,10469,7778,3832,1KNN,0.003,,
55 | 53,LVAR+LSVM,0.9204,10469,7778,3832,LSVM,0.003,,
56 | 54,LVAR+NN100x50,0.9442,10469,7778,3832,NN100x50,0.003,,
57 | 55,LVAR+NN500x250,0.9489,10469,7778,3832,NN500x250,0.003,,
58 | 56,LVAR+NB,0.9152,8787,7778,3832,NB,0.004,,
59 | 57,LVAR+LR,0.9217,8787,7778,3832,LR,0.004,,
60 | 58,LVAR+5NN,0.6508,8787,7778,3832,5NN,0.004,,
61 | 59,LVAR+2NN,0.7427,8787,7778,3832,2NN,0.004,,
62 | 60,LVAR+1KNN,0.7477,8787,7778,3832,1KNN,0.004,,
63 | 61,LVAR+LSVM,0.9113,8787,7778,3832,LSVM,0.004,,
64 | 62,LVAR+NN100x50,0.9241,8787,7778,3832,NN100x50,0.004,,
65 | 63,LVAR+NN500x250,0.9431,8787,7778,3832,NN500x250,0.004,,
66 | 64,LVAR+NB,0.9108,7651,7778,3832,NB,0.005,,
67 | 65,LVAR+LR,0.9186,7651,7778,3832,LR,0.005,,
68 | 66,LVAR+5NN,0.6647,7651,7778,3832,5NN,0.005,,
69 | 67,LVAR+2NN,0.7445,7651,7778,3832,2NN,0.005,,
70 | 68,LVAR+1KNN,0.7461,7651,7778,3832,1KNN,0.005,,
71 | 69,LVAR+LSVM,0.9094,7651,7778,3832,LSVM,0.005,,
72 | 70,LVAR+NN100x50,0.934,7651,7778,3832,NN100x50,0.005,,
73 | 71,LVAR+NN500x250,0.9415,7651,7778,3832,NN500x250,0.005,,
74 | 72,LVAR+NB,0.8977,4880,7778,3832,NB,0.01,,
75 | 73,LVAR+LR,0.9076,4880,7778,3832,LR,0.01,,
76 | 74,LVAR+5NN,0.6942,4880,7778,3832,5NN,0.01,,
77 | 75,LVAR+2NN,0.7484,4880,7778,3832,2NN,0.01,,
78 | 76,LVAR+1KNN,0.7474,4880,7778,3832,1KNN,0.01,,
79 | 77,LVAR+LSVM,0.8883,4880,7778,3832,LSVM,0.01,,
80 | 78,LVAR+NN100x50,0.916,4880,7778,3832,NN100x50,0.01,,
81 | 79,LVAR+NN500x250,0.9233,4880,7778,3832,NN500x250,0.01,,
82 | 80,KBEST+NB,0.8142,1000,7778,3832,NB,,1000.0,
83 | 81,KBEST+LR,0.8411,1000,7778,3832,LR,,1000.0,
84 | 82,KBEST+5NN,0.7109,1000,7778,3832,5NN,,1000.0,
85 | 83,KBEST+2NN,0.7401,1000,7778,3832,2NN,,1000.0,
86 | 84,KBEST+1KNN,0.738,1000,7778,3832,1KNN,,1000.0,
87 | 85,KBEST+LSVM,0.8419,1000,7778,3832,LSVM,,1000.0,
88 | 86,KBEST+NN100x50,0.8398,1000,7778,3832,NN100x50,,1000.0,
89 | 87,KBEST+NN500x250,0.858,1000,7778,3832,NN500x250,,1000.0,
90 | 88,KBEST+NB,0.852,2000,7778,3832,NB,,2000.0,
91 | 89,KBEST+LR,0.8805,2000,7778,3832,LR,,2000.0,
92 | 90,KBEST+5NN,0.7049,2000,7778,3832,5NN,,2000.0,
93 | 91,KBEST+2NN,0.7557,2000,7778,3832,2NN,,2000.0,
94 | 92,KBEST+1KNN,0.7576,2000,7778,3832,1KNN,,2000.0,
95 | 93,KBEST+LSVM,0.8638,2000,7778,3832,LSVM,,2000.0,
96 | 94,KBEST+NN100x50,0.8779,2000,7778,3832,NN100x50,,2000.0,
97 | 95,KBEST+NN500x250,0.8847,2000,7778,3832,NN500x250,,2000.0,
98 | 96,KBEST+NB,0.8706,3000,7778,3832,NB,,3000.0,
99 | 97,KBEST+LR,0.8935,3000,7778,3832,LR,,3000.0,
100 | 98,KBEST+5NN,0.6965,3000,7778,3832,5NN,,3000.0,
101 | 99,KBEST+2NN,0.757,3000,7778,3832,2NN,,3000.0,
102 | 100,KBEST+1KNN,0.7615,3000,7778,3832,1KNN,,3000.0,
103 | 101,KBEST+LSVM,0.8758,3000,7778,3832,LSVM,,3000.0,
104 | 102,KBEST+NN100x50,0.8948,3000,7778,3832,NN100x50,,3000.0,
105 | 103,KBEST+NN500x250,0.9014,3000,7778,3832,NN500x250,,3000.0,
106 | 104,KBEST+NB,0.9001,5000,7778,3832,NB,,5000.0,
107 | 105,KBEST+LR,0.9084,5000,7778,3832,LR,,5000.0,
108 | 106,KBEST+5NN,0.721,5000,7778,3832,5NN,,5000.0,
109 | 107,KBEST+2NN,0.7565,5000,7778,3832,2NN,,5000.0,
110 | 108,KBEST+1KNN,0.7557,5000,7778,3832,1KNN,,5000.0,
111 | 109,KBEST+LSVM,0.8922,5000,7778,3832,LSVM,,5000.0,
112 | 110,KBEST+NN100x50,0.9154,5000,7778,3832,NN100x50,,5000.0,
113 | 111,KBEST+NN500x250,0.9272,5000,7778,3832,NN500x250,,5000.0,
114 | 112,KBEST+NB,0.9194,10000,7778,3832,NB,,10000.0,
115 | 113,KBEST+LR,0.9251,10000,7778,3832,LR,,10000.0,
116 | 114,KBEST+5NN,0.6803,10000,7778,3832,5NN,,10000.0,
117 | 115,KBEST+2NN,0.7516,10000,7778,3832,2NN,,10000.0,
118 | 116,KBEST+1KNN,0.7537,10000,7778,3832,1KNN,,10000.0,
119 | 117,KBEST+LSVM,0.9165,10000,7778,3832,LSVM,,10000.0,
120 | 118,KBEST+NN100x50,0.9405,10000,7778,3832,NN100x50,,10000.0,
121 | 119,KBEST+NN500x250,0.9444,10000,7778,3832,NN500x250,,10000.0,
122 | 120,KBEST+NB,0.9293,15000,7778,3832,NB,,15000.0,
123 | 121,KBEST+LR,0.9327,15000,7778,3832,LR,,15000.0,
124 | 122,KBEST+5NN,0.696,15000,7778,3832,5NN,,15000.0,
125 | 123,KBEST+2NN,0.7505,15000,7778,3832,2NN,,15000.0,
126 | 124,KBEST+1KNN,0.7466,15000,7778,3832,1KNN,,15000.0,
127 | 125,KBEST+LSVM,0.9241,15000,7778,3832,LSVM,,15000.0,
128 | 126,KBEST+NN100x50,0.9442,15000,7778,3832,NN100x50,,15000.0,
129 | 127,KBEST+NN500x250,0.9517,15000,7778,3832,NN500x250,,15000.0,
130 | 128,KBEST+NB,0.9332,20000,7778,3832,NB,,20000.0,
131 | 129,KBEST+LR,0.9353,20000,7778,3832,LR,,20000.0,
132 | 130,KBEST+5NN,0.7129,20000,7778,3832,5NN,,20000.0,
133 | 131,KBEST+2NN,0.7544,20000,7778,3832,2NN,,20000.0,
134 | 132,KBEST+1KNN,0.7537,20000,7778,3832,1KNN,,20000.0,
135 | 133,KBEST+LSVM,0.9327,20000,7778,3832,LSVM,,20000.0,
136 | 134,KBEST+NN100x50,0.9502,20000,7778,3832,NN100x50,,20000.0,
137 | 135,KBEST+NN500x250,0.9528,20000,7778,3832,NN500x250,,20000.0,
138 | 136,KBEST+NB,0.9374,25000,7778,3832,NB,,25000.0,
139 | 137,KBEST+LR,0.9384,25000,7778,3832,LR,,25000.0,
140 | 138,KBEST+5NN,0.697,25000,7778,3832,5NN,,25000.0,
141 | 139,KBEST+2NN,0.7544,25000,7778,3832,2NN,,25000.0,
142 | 140,KBEST+1KNN,0.7516,25000,7778,3832,1KNN,,25000.0,
143 | 141,KBEST+LSVM,0.9348,25000,7778,3832,LSVM,,25000.0,
144 | 142,KBEST+NN100x50,0.9556,25000,7778,3832,NN100x50,,25000.0,
145 | 143,KBEST+NN500x250,0.9538,25000,7778,3832,NN500x250,,25000.0,
146 | 144,KBEST+NB,0.9382,30000,7778,3832,NB,,30000.0,
147 | 145,KBEST+LR,0.9389,30000,7778,3832,LR,,30000.0,
148 | 146,KBEST+5NN,0.6464,30000,7778,3832,5NN,,30000.0,
149 | 147,KBEST+2NN,0.7521,30000,7778,3832,2NN,,30000.0,
150 | 148,KBEST+1KNN,0.7458,30000,7778,3832,1KNN,,30000.0,
151 | 149,KBEST+LSVM,0.9366,30000,7778,3832,LSVM,,30000.0,
152 | 150,KBEST+NN100x50,0.9564,30000,7778,3832,NN100x50,,30000.0,
153 | 151,KBEST+NN500x250,0.958,30000,7778,3832,NN500x250,,30000.0,
154 | 152,TOPN+NB,0.7996,982,7778,3832,NB,,,5.0
155 | 153,TOPN+LR,0.8233,982,7778,3832,LR,,,5.0
156 | 154,TOPN+5NN,0.6644,982,7778,3832,5NN,,,5.0
157 | 155,TOPN+2NN,0.7174,982,7778,3832,2NN,,,5.0
158 | 156,TOPN+1KNN,0.7231,982,7778,3832,1KNN,,,5.0
159 | 157,TOPN+LSVM,0.7871,982,7778,3832,LSVM,,,5.0
160 | 158,TOPN+NN100x50,0.8233,982,7778,3832,NN100x50,,,5.0
161 | 159,TOPN+NN500x250,0.8463,982,7778,3832,NN500x250,,,5.0
162 | 160,TOPN+NB,0.8854,2926,7778,3832,NB,,,10.0
163 | 161,TOPN+LR,0.9006,2926,7778,3832,LR,,,10.0
164 | 162,TOPN+5NN,0.7169,2926,7778,3832,5NN,,,10.0
165 | 163,TOPN+2NN,0.7667,2926,7778,3832,2NN,,,10.0
166 | 164,TOPN+1KNN,0.7735,2926,7778,3832,1KNN,,,10.0
167 | 165,TOPN+LSVM,0.8802,2926,7778,3832,LSVM,,,10.0
168 | 166,TOPN+NN100x50,0.9134,2926,7778,3832,NN100x50,,,10.0
169 | 167,TOPN+NN500x250,0.9196,2926,7778,3832,NN500x250,,,10.0
170 | 168,TOPN+NB,0.9058,5096,7778,3832,NB,,,15.0
171 | 169,TOPN+LR,0.917,5096,7778,3832,LR,,,15.0
172 | 170,TOPN+5NN,0.6986,5096,7778,3832,5NN,,,15.0
173 | 171,TOPN+2NN,0.7654,5096,7778,3832,2NN,,,15.0
174 | 172,TOPN+1KNN,0.7706,5096,7778,3832,1KNN,,,15.0
175 | 173,TOPN+LSVM,0.9045,5096,7778,3832,LSVM,,,15.0
176 | 174,TOPN+NN100x50,0.9314,5096,7778,3832,NN100x50,,,15.0
177 | 175,TOPN+NN500x250,0.9337,5096,7778,3832,NN500x250,,,15.0
178 | 176,TOPN+NB,0.9196,7548,7778,3832,NB,,,20.0
179 | 177,TOPN+LR,0.9269,7548,7778,3832,LR,,,20.0
180 | 178,TOPN+5NN,0.7059,7548,7778,3832,5NN,,,20.0
181 | 179,TOPN+2NN,0.7664,7548,7778,3832,2NN,,,20.0
182 | 180,TOPN+1KNN,0.7704,7548,7778,3832,1KNN,,,20.0
183 | 181,TOPN+LSVM,0.917,7548,7778,3832,LSVM,,,20.0
184 | 182,TOPN+NN100x50,0.9434,7548,7778,3832,NN100x50,,,20.0
185 | 183,TOPN+NN500x250,0.9468,7548,7778,3832,NN500x250,,,20.0
186 | 184,TOPN+NB,0.9269,10149,7778,3832,NB,,,25.0
187 | 185,TOPN+LR,0.9311,10149,7778,3832,LR,,,25.0
188 | 186,TOPN+5NN,0.7171,10149,7778,3832,5NN,,,25.0
189 | 187,TOPN+2NN,0.7701,10149,7778,3832,2NN,,,25.0
190 | 188,TOPN+1KNN,0.7756,10149,7778,3832,1KNN,,,25.0
191 | 189,TOPN+LSVM,0.9277,10149,7778,3832,LSVM,,,25.0
192 | 190,TOPN+NN100x50,0.947,10149,7778,3832,NN100x50,,,25.0
193 | 191,TOPN+NN500x250,0.9528,10149,7778,3832,NN500x250,,,25.0
194 | 192,TOPN+NB,0.9421,20942,7778,3832,NB,,,50.0
195 | 193,TOPN+LR,0.9384,20942,7778,3832,LR,,,50.0
196 | 194,TOPN+5NN,0.7192,20942,7778,3832,5NN,,,50.0
197 | 195,TOPN+2NN,0.7644,20942,7778,3832,2NN,,,50.0
198 | 196,TOPN+1KNN,0.7717,20942,7778,3832,1KNN,,,50.0
199 | 197,TOPN+LSVM,0.9379,20942,7778,3832,LSVM,,,50.0
200 | 198,TOPN+NN100x50,0.9554,20942,7778,3832,NN100x50,,,50.0
201 | 199,TOPN+NN500x250,0.9509,20942,7778,3832,NN500x250,,,50.0
202 | 200,TOPN+NB,0.9402,30793,7778,3832,NB,,,100.0
203 | 201,TOPN+LR,0.9402,30793,7778,3832,LR,,,100.0
204 | 202,TOPN+5NN,0.6934,30793,7778,3832,5NN,,,100.0
205 | 203,TOPN+2NN,0.7568,30793,7778,3832,2NN,,,100.0
206 | 204,TOPN+1KNN,0.7649,30793,7778,3832,1KNN,,,100.0
207 | 205,TOPN+LSVM,0.9384,30793,7778,3832,LSVM,,,100.0
208 | 206,TOPN+NN100x50,0.9575,30793,7778,3832,NN100x50,,,100.0
209 | 207,TOPN+NN500x250,0.9603,30793,7778,3832,NN500x250,,,100.0
210 | 208,TOPN+NB,0.9415,37281,7778,3832,NB,,,250.0
211 | 209,TOPN+LR,0.9402,37281,7778,3832,LR,,,250.0
212 | 210,TOPN+5NN,0.6908,37281,7778,3832,5NN,,,250.0
213 | 211,TOPN+2NN,0.755,37281,7778,3832,2NN,,,250.0
214 | 212,TOPN+1KNN,0.763,37281,7778,3832,1KNN,,,250.0
215 | 213,TOPN+LSVM,0.9392,37281,7778,3832,LSVM,,,250.0
216 | 214,TOPN+NN100x50,0.9551,37281,7778,3832,NN100x50,,,250.0
217 | 215,TOPN+NN500x250,0.9616,37281,7778,3832,NN500x250,,,250.0
218 | 216,TOPN+NB,0.9397,39694,7778,3832,NB,,,500.0
219 | 217,TOPN+LR,0.94,39694,7778,3832,LR,,,500.0
220 | 218,TOPN+5NN,0.6895,39694,7778,3832,5NN,,,500.0
221 | 219,TOPN+2NN,0.7547,39694,7778,3832,2NN,,,500.0
222 | 220,TOPN+1KNN,0.7625,39694,7778,3832,1KNN,,,500.0
223 | 221,TOPN+LSVM,0.9392,39694,7778,3832,LSVM,,,500.0
224 | 222,TOPN+NN100x50,0.9572,39694,7778,3832,NN100x50,,,500.0
225 | 223,TOPN+NN500x250,0.9577,39694,7778,3832,NN500x250,,,500.0
226 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import train_test_split
2 | from sklearn.metrics import accuracy_score
3 | from sklearn.feature_extraction.text import CountVectorizer
4 | from sklearn.feature_extraction import DictVectorizer
5 | from sklearn.feature_selection import VarianceThreshold
6 | from sklearn.feature_selection import SelectKBest
7 | from sklearn.feature_selection import chi2
8 | from sklearn.feature_selection import SelectFromModel
9 | from sklearn.svm import LinearSVC
10 | from collections import Counter
11 | from GraphOfDocs import select
12 | from GraphOfDocs import config_experiments
13 | import seaborn as sns
14 | import matplotlib.pyplot as plt
15 |
16 | def benchmark_classifier(clf, X_train, y_train, X_test, y_test, round_digits = 4):
17 | clf.fit(X_train, y_train)
18 | y_pred = clf.predict(X_test)
19 | accuracy = accuracy_score(y_test, y_pred)
20 | accuracy = round(accuracy, round_digits)
21 | return clf, accuracy
22 |
23 | def generate_plots(df, show_only = True, output_dir = '', plots_prefix = 'plot'):
24 | unique_classifier_names = list(df['Classifier'].unique())
25 | for clf in unique_classifier_names:
26 | df_tmp = df[df['Classifier'] == clf]
27 | lineplot = lambda data: sns.lineplot(
28 | x = "Number of features",
29 | y = "Accuracy",
30 | hue = "Method",
31 | style = "Method",
32 | markers = True,
33 | dashes = False,
34 | data = data
35 | )
36 |
37 | if show_only:
38 | lineplot(df_tmp)
39 | plt.show()
40 | else:
41 | lineplot(df_tmp)
42 | plt.savefig(f'{output_dir}/{plots_prefix}_{clf}.png', dpi = 100)
43 | plt.clf()
44 |
45 | lineplot(df_tmp)
46 | plt.ylim(0, 1)
47 | plt.savefig(f'{output_dir}/{plots_prefix}_{clf}_0_1.png', dpi = 100)
48 | plt.clf()
49 |
50 | class GraphOfDocsClassifier:
51 | def __init__(self, doc_to_community_dict, doc_communities_dict,
52 | test_size = 0.33, random_state = 42):
53 | self.__test_size = test_size
54 | self.__random_state = random_state
55 | self.__doc_to_community_dict = doc_to_community_dict
56 | self.__doc_communities_dict = doc_communities_dict
57 |
58 | def calculate_accuracy(self, document_identifiers, results_table):
59 | _, test_docs = train_test_split(document_identifiers,
60 | test_size = self.__test_size,
61 | random_state = self.__random_state)
62 | test_docs = list(test_docs)
63 | class_true = []
64 | class_pred = []
65 | for test_doc in test_docs:
66 | community_id = self.__doc_to_community_dict[test_doc]
67 | community_docs = self.__doc_communities_dict[community_id]
68 | classes = [config_experiments.extract_file_class(doc)
69 | for doc in community_docs if doc != test_doc]
70 |
71 | correct_class = config_experiments.extract_file_class(test_doc)
72 | classified_class = Counter(classes).most_common(1)[0][0]
73 | class_true.append(correct_class)
74 | class_pred.append(classified_class)
75 | accuracy = round(accuracy_score(class_true, class_pred), 4)
76 | #print('Accuracy: %s' % (accuracy))
77 | results_table.add_row(['Graph-of-docs Classifier',
78 | accuracy, 'N/A', 'N/A', len(test_docs), ''])
79 |
80 | class Evaluator:
81 | def __init__(self, test_size = 0.33, random_state = 42):
82 | self._test_size = test_size
83 | self._random_state = random_state
84 | def evaluate(self, x, y, **kwargs):
85 | raise NotImplemented('pure virtual')
86 |
87 | def _collect_evaluation_results(self, x_train_transformed, y_train,
88 | x_test_transformed, y_test, results_table,
89 | classifiers, method_prefix, extra_details = {}):
90 | train_size = x_train_transformed.shape[0]
91 | test_size = x_test_transformed.shape[0]
92 | number_of_features = x_test_transformed.shape[1]
93 | evaluation_results = []
94 | for classifier in classifiers:
95 | _, accuracy = benchmark_classifier(classifier[1], x_train_transformed
96 | , y_train, x_test_transformed, y_test)
97 | # print('classifier:%s %s %s' % (classifier[0], accuracy, number_of_features))
98 | method = method_prefix + classifier[0]
99 | results_table.add_row([method, accuracy,
100 | number_of_features,
101 | train_size, test_size,
102 | str(extra_details)])
103 | classifier_results = {
104 | 'Method': method,
105 | 'Accuracy': accuracy,
106 | 'Number of features': number_of_features,
107 | 'Train size': train_size,
108 | 'Test size': test_size,
109 | }
110 | classifier_results.update(extra_details)
111 | classifier_results.update({'Classifier': classifier[0]})
112 | evaluation_results.append(classifier_results)
113 | return evaluation_results
114 |
115 | class BOWEvaluator(Evaluator):
116 | def __init__(self, test_size = 0.33, random_state = 42):
117 | Evaluator.__init__(self, test_size, random_state)
118 |
119 | def evaluate(self, x, y, **kwargs):
120 | x_train, x_test, y_train, \
121 | y_test = train_test_split(x, y, test_size = self._test_size,
122 | random_state = self._random_state)
123 | cv = CountVectorizer()
124 | x_train_transformed = cv.fit_transform(x_train)
125 | print(f'Number of features in BOWEvaluator: {x_train_transformed.shape[1]}')
126 | x_test_transformed = cv.transform(x_test)
127 |
128 | results_table = kwargs['results_table']
129 | classifiers = kwargs['classifiers']
130 | return self._collect_evaluation_results(
131 | x_train_transformed, y_train, x_test_transformed,
132 | y_test, results_table, classifiers, method_prefix='BOW+')
133 |
134 | class MetaFeatureSelectionEvaluator(Evaluator):
135 | def __init__(self, estimator_model = LinearSVC,
136 | test_size = 0.33, random_state = 42):
137 | Evaluator.__init__(self, test_size, random_state)
138 | self.__estimator_model = estimator_model
139 |
140 | def evaluate(self, x, y, **kwargs):
141 | x_train, x_test, y_train, \
142 | y_test = train_test_split(x, y,
143 | test_size=self._test_size,
144 | random_state = self._random_state)
145 | cv = CountVectorizer()
146 | x_train_transformed = cv.fit_transform(x_train)
147 | x_test_transformed = cv.transform(x_test)
148 | selector = SelectFromModel(estimator = self.__estimator_model())
149 | x_train_transformed = selector.fit_transform(x_train_transformed, y_train)
150 | x_test_transformed = selector.transform(x_test_transformed)
151 |
152 | results_table = kwargs['results_table']
153 | classifiers = kwargs['classifiers']
154 | return self._collect_evaluation_results(
155 | x_train_transformed, y_train, x_test_transformed,
156 | y_test, results_table, classifiers, method_prefix = 'META+')
157 |
158 | class LowVarianceFeatureSelectionEvaluator(Evaluator):
159 | def __init__(self, variance_threshold, test_size = 0.33, random_state = 42):
160 | Evaluator.__init__(self, test_size, random_state)
161 | self.__variance_threshold = variance_threshold
162 |
163 | def evaluate(self, x, y, **kwargs):
164 | x_train, x_test, y_train, \
165 | y_test = train_test_split(x, y,
166 | test_size = self._test_size,
167 | random_state = self._random_state)
168 | cv = CountVectorizer()
169 | x_train_transformed = cv.fit_transform(x_train)
170 | x_test_transformed = cv.transform(x_test)
171 | selector = VarianceThreshold(threshold = self.__variance_threshold)
172 | x_train_transformed = selector.fit_transform(x_train_transformed, y_train)
173 | x_test_transformed = selector.transform(x_test_transformed)
174 |
175 | results_table = kwargs['results_table']
176 | classifiers = kwargs['classifiers']
177 | extra_details = {
178 | 'variance thershold': self.__variance_threshold
179 | }
180 | return self._collect_evaluation_results(
181 | x_train_transformed, y_train, x_test_transformed,
182 | y_test, results_table, classifiers, method_prefix='LVAR+',
183 | extra_details = extra_details)
184 |
185 | class SelectKBestFeatureSelectionEvaluator(Evaluator):
186 | def __init__(self, kbest, test_size = 0.33, random_state = 42):
187 | Evaluator.__init__(self, test_size, random_state)
188 | self.__kbest = kbest
189 |
190 | def evaluate(self, x, y, **kwargs):
191 | x_train, x_test, y_train, \
192 | y_test = train_test_split(x, y,
193 | test_size = self._test_size,
194 | random_state = self._random_state)
195 | cv = CountVectorizer()
196 | x_train_transformed = cv.fit_transform(x_train)
197 | x_test_transformed = cv.transform(x_test)
198 | selector = SelectKBest(chi2, k = self.__kbest)
199 | x_train_transformed = selector.fit_transform(x_train_transformed, y_train)
200 | x_test_transformed = selector.transform(x_test_transformed)
201 |
202 | results_table = kwargs['results_table']
203 | classifiers = kwargs['classifiers']
204 | extra_details = {
205 | 'kbest': self.__kbest
206 | }
207 | return self._collect_evaluation_results(
208 | x_train_transformed, y_train, x_test_transformed,
209 | y_test, results_table, classifiers, method_prefix = 'KBEST+',
210 | extra_details=extra_details)
211 |
212 | class BigramsExtractionEvaluator(Evaluator):
213 | def __init__(self, test_size = 0.33, random_state = 42):
214 | Evaluator.__init__(self, test_size, random_state)
215 |
216 | def evaluate(self, x, y, **kwargs):
217 | x_train, x_test, y_train, \
218 | y_test = train_test_split(
219 | x, y, test_size = self._test_size,
220 | random_state = self._random_state)
221 | cv = CountVectorizer(ngram_range = (2,2))
222 | x_train_transformed = cv.fit_transform(x_train)
223 | x_test_transformed = cv.transform(x_test)
224 |
225 | results_table = kwargs['results_table']
226 | classifiers = kwargs['classifiers']
227 | return self._collect_evaluation_results(
228 | x_train_transformed, y_train, x_test_transformed,
229 | y_test, results_table, classifiers, 'BI+')
230 |
231 | class BigramsExtractionAndSelectKBestFeatureSelectionEvaluator(Evaluator):
232 | def __init__(self, kbest, test_size = 0.33, random_state = 42):
233 | Evaluator.__init__(self, test_size, random_state)
234 | self.__kbest = kbest
235 |
236 | def evaluate(self, x, y, **kwargs):
237 | x_train, x_test, y_train, \
238 | y_test = train_test_split(x, y,
239 | test_size = self._test_size,
240 | random_state = self._random_state)
241 | cv = CountVectorizer(ngram_range = (2,2))
242 | x_train_transformed = cv.fit_transform(x_train)
243 | x_test_transformed = cv.transform(x_test)
244 | selector = SelectKBest(chi2, k = self.__kbest)
245 | x_train_transformed = selector.fit_transform(x_train_transformed, y_train)
246 | x_test_transformed = selector.transform(x_test_transformed)
247 |
248 | results_table = kwargs['results_table']
249 | classifiers = kwargs['classifiers']
250 | extra_details = {
251 | 'kbest': self.__kbest
252 | }
253 | return self._collect_evaluation_results(
254 | x_train_transformed, y_train, x_test_transformed, y_test,
255 | results_table, classifiers, 'BI+KBEST+', extra_details = extra_details)
256 |
257 | class TopNOfEachCommunityEvaluator(Evaluator):
258 | def __init__(self, top_n, doc_to_community_dict,
259 | doc_communities_dict, test_size = 0.33, random_state = 42):
260 | Evaluator.__init__(self, test_size, random_state)
261 | self.__top_n = top_n
262 | self.__doc_to_community_dict = doc_to_community_dict
263 | self.__doc_communities_dict = doc_communities_dict
264 |
265 | def evaluate(self, x, y, **kwargs):
266 | df = kwargs['df']
267 | positions_train = kwargs['positions_train']
268 | train_docs = list(df.iloc[positions_train]['identifier'])
269 | database = kwargs['database']
270 | vocabulary = []
271 | community_id_to_tags = select.get_communities_tags(
272 | database, top_terms = self.__top_n)
273 | for doc in train_docs:
274 | for word in community_id_to_tags[self.__doc_to_community_dict[doc]]:
275 | vocabulary.append(word)
276 | vocabulary = list(set(vocabulary))
277 | x_train, x_test, y_train, \
278 | y_test = train_test_split(x, y,
279 | test_size = self._test_size,
280 | random_state=self._random_state)
281 | cv = CountVectorizer(vocabulary = vocabulary)
282 | x_train_transformed = cv.fit_transform(x_train, y_train)
283 | x_test_transformed = cv.transform(x_test)
284 |
285 | results_table = kwargs['results_table']
286 | classifiers = kwargs['classifiers']
287 | extra_details = {
288 | 'top_n': self.__top_n
289 | }
290 | return self._collect_evaluation_results(
291 | x_train_transformed, y_train, x_test_transformed,
292 | y_test, results_table, classifiers, 'TOPN+', extra_details = extra_details)
293 |
294 | # Ignore this class for the AIAI paper. Future work.
295 | class Docs2ComEvaluator(Evaluator):
296 | def __init__(self, top_n, doc_to_community_dict,
297 | doc_communities_dict, label_encoder, test_size = 0.33, random_state = 42):
298 | Evaluator.__init__(self, test_size, random_state)
299 | self.__top_n = top_n
300 | self.__doc_to_community_dict = doc_to_community_dict
301 | self.__doc_communities_dict = doc_communities_dict
302 | self.__label_encoder = label_encoder
303 |
304 | # ### [tag1, tag2, ... tagN] -> class (Do this for each community of docs)
305 | # TODO: Clean up this method.
306 | def evaluate(self, x, y, **kwargs):
307 | df = kwargs['df']
308 | positions_train = kwargs['positions_train']
309 | positions_test = kwargs['positions_test']
310 | train_docs = list(df.iloc[positions_train]['identifier'])
311 | test_docs = list(df.iloc[positions_test]['identifier'])
312 | database = kwargs['database']
313 | unique_community_ids = list(set([self.__doc_to_community_dict[doc]
314 | for doc in train_docs]))
315 |
316 | communities_y = []
317 | communities_tags = []
318 | for community_id in unique_community_ids:
319 | # Find the most common community class
320 | community_docs = self.__doc_communities_dict[community_id]
321 | classes = [config_experiments.extract_file_class(doc)
322 | for doc in community_docs if doc not in test_docs]
323 | classified_class = Counter(classes).most_common(1)[0][0]
324 | communities_y.append(classified_class)
325 | # Get the most important tags of each community.
326 | communities_tags.append(' '.join(
327 | select.get_community_tags(
328 | database, community_id, top_terms = self.__top_n)))
329 |
330 | cv = CountVectorizer()
331 | x_transformed = cv.fit_transform(communities_tags)
332 | communities_y_encoded = self.__label_encoder.transform(communities_y)
333 | x_test_docs = []
334 | for doc in list(df[df['identifier'].isin(test_docs)]['text']):
335 | x_test_docs.append(' '.join(list(set(doc.split()))))
336 | x_test_docs_transformed = cv.transform(x_test_docs)
337 | y_test = list(df[df['identifier'].isin(test_docs)]['class_number'])
338 |
339 | results_table = kwargs['results_table']
340 | classifiers = kwargs['classifiers']
341 | extra_details = {
342 | 'top_n': self.__top_n
343 | }
344 | return self._collect_evaluation_results(
345 | x_transformed, communities_y_encoded, x_test_docs_transformed, y_test,
346 | results_table, classifiers, 'DOC2COM+', extra_details=extra_details)
347 |
348 | class GraphOfDocsBigramsExtractionEvaluator(Evaluator):
349 | def __init__(self, top_n = None, min_weight = None,
350 | test_size = 0.33, random_state = 42):
351 | Evaluator.__init__(self, test_size, random_state)
352 | self.__top_n = top_n
353 | self.__min_weight = min_weight
354 |
355 | def __generate_bigram_features(self, document_bigrams):
356 | if self.__top_n is not None:
357 | document_bigrams = document_bigrams[:self.__top_n]
358 | elif self.__min_weight is not None:
359 | document_bigrams = [bigram for bigram in document_bigrams
360 | if bigram[3] >= self.__min_weight]
361 | generated_bigrams = []
362 | for bigram in document_bigrams:
363 | generated_bigrams.append(bigram[0] + '-' + bigram[1])
364 | return generated_bigrams
365 |
366 | def __convert_documents_to_bigrams_dicts(self, database, document_ids):
367 | bigrams_dicts = []
368 | for document_id in document_ids:
369 | bigrams = select.get_word_digrams_by_filename(database, document_id)[0][0]
370 | bigrams = self.__generate_bigram_features(bigrams)
371 | bigrams_dicts.append({bigram: 1 for bigram in bigrams})
372 | return bigrams_dicts
373 |
374 | def evaluate(self, x, y, **kwargs):
375 | df = kwargs['df']
376 | database = kwargs['database']
377 | train_docs, test_docs, y_train, \
378 | y_test = train_test_split(
379 | df['identifier'], y, test_size = self._test_size,
380 | random_state = self._random_state)
381 | train_docs = list(train_docs)
382 | test_docs = list(test_docs)
383 |
384 | train_documents = self.__convert_documents_to_bigrams_dicts(database, train_docs)
385 | test_documents = self.__convert_documents_to_bigrams_dicts(database, test_docs)
386 |
387 | dict_vectorizer = DictVectorizer()
388 | train_transformed = dict_vectorizer.fit_transform(train_documents)
389 | test_transformed = dict_vectorizer.transform(test_documents)
390 |
391 | results_table = kwargs['results_table']
392 | classifiers = kwargs['classifiers']
393 | extra_details = {
394 | 'top_n': self.__top_n,
395 | 'min_weight': self.__min_weight
396 | }
397 | prefix = 'TOP_N' if self.__top_n else 'MIN_WEIGHT'
398 | return self._collect_evaluation_results(
399 | train_transformed, y_train, test_transformed,
400 | y_test, results_table, classifiers, f'GOD+BI {prefix}+',
401 | extra_details = extra_details)
402 |
--------------------------------------------------------------------------------
/GraphOfDocs/evaluation_results/lingspam/lingspam_results.txt:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 | !START OF THE EXPERIMENT!
3 | DATASET DIR PATH: C:\Users\USER\source\repos\GraphOfDocs\GraphOfDocs\datasets\lingspam
4 | MIN NUMBER OF DOCUMENTS PER SELECTED COMMUNITY: 2
5 | VARIANCE THRESHOLD: [0.0005, 0.001, 0.0015, 0.002, 0.003, 0.004, 0.005, 0.01]
6 | SELECT KBEST K: [1000, 2000, 3000, 4000, 5000, 6000, 7000, 10000, 14000]
7 | TOP N SELECTED COMMUNITY TERMS: [5, 10, 15, 20, 25, 50, 100, 250, 500]
8 | Number of selected documents: 816
9 | EXAMPLE OF THE PANDAS DATAFRAME
10 | identifier class class_number text
11 | 1530 msg_9-1191msg1.txt msg 0 workshop embodied conversational characters ca...
12 | 1769 msg_9-159msg2.txt msg 0 evaluation parsing systems evaluation parsing ...
13 | Number of unique classes: 2
14 | Number of features in BOWEvaluator:16695
15 | C:\Users\USER\source\repos\GraphOfDocs\GraphOfDocs\Virtual Environment\lib\site-packages\sklearn\svm\_base.py:947: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
16 | "the number of iterations.", ConvergenceWarning)
17 | EXAMPLE OF THE EVALUATION RESULTS PANDAS DATAFRAME
18 | Method Accuracy Number of features Train size Test size Classifier variance thershold kbest top_n
19 | 0 BOW+NB 0.9963 16695 546 270 NB NaN NaN NaN
20 | 1 BOW+LR 1.0000 16695 546 270 LR NaN NaN NaN
21 | +--------------------------+----------+--------------------+------------+-----------+--------------------------------+
22 | | Method | Accuracy | Number of features | Train size | Test size | Details |
23 | +--------------------------+----------+--------------------+------------+-----------+--------------------------------+
24 | | TOPN+NN100x50 | 1.0 | 8975 | 546 | 270 | {'top_n': 250} |
25 | | TOPN+NN100x50 | 1.0 | 2274 | 546 | 270 | {'top_n': 50} |
26 | | TOPN+NN100x50 | 1.0 | 995 | 546 | 270 | {'top_n': 25} |
27 | | TOPN+NN100x50 | 1.0 | 758 | 546 | 270 | {'top_n': 20} |
28 | | TOPN+LR | 1.0 | 12104 | 546 | 270 | {'top_n': 500} |
29 | | TOPN+LR | 1.0 | 8975 | 546 | 270 | {'top_n': 250} |
30 | | TOPN+LR | 1.0 | 4443 | 546 | 270 | {'top_n': 100} |
31 | | TOPN+LR | 1.0 | 2274 | 546 | 270 | {'top_n': 50} |
32 | | TOPN+LR | 1.0 | 995 | 546 | 270 | {'top_n': 25} |
33 | | TOPN+LR | 1.0 | 758 | 546 | 270 | {'top_n': 20} |
34 | | TOPN+LR | 1.0 | 526 | 546 | 270 | {'top_n': 15} |
35 | | TOPN+LR | 1.0 | 296 | 546 | 270 | {'top_n': 10} |
36 | | TOPN+LR | 1.0 | 120 | 546 | 270 | {'top_n': 5} |
37 | | META+NN100x50 | 1.0 | 2509 | 546 | 270 | {} |
38 | | META+LSVM | 1.0 | 2509 | 546 | 270 | {} |
39 | | META+LR | 1.0 | 2509 | 546 | 270 | {} |
40 | | LVAR+NN100x50 | 1.0 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
41 | | LVAR+NN100x50 | 1.0 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
42 | | LVAR+LSVM | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
43 | | LVAR+LSVM | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
44 | | LVAR+LSVM | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
45 | | LVAR+LR | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
46 | | LVAR+LR | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
47 | | LVAR+LR | 1.0 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
48 | | LVAR+LR | 1.0 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
49 | | LVAR+LR | 1.0 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
50 | | LVAR+LR | 1.0 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
51 | | LVAR+LR | 1.0 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
52 | | KBEST+NN100x50 | 1.0 | 1000 | 546 | 270 | {'kbest': 1000} |
53 | | KBEST+LSVM | 1.0 | 1000 | 546 | 270 | {'kbest': 1000} |
54 | | KBEST+LR | 1.0 | 14000 | 546 | 270 | {'kbest': 14000} |
55 | | KBEST+LR | 1.0 | 10000 | 546 | 270 | {'kbest': 10000} |
56 | | KBEST+LR | 1.0 | 1000 | 546 | 270 | {'kbest': 1000} |
57 | | Graph-of-docs Classifier | 1.0 | N/A | N/A | 270 | |
58 | | BOW+LSVM | 1.0 | 16695 | 546 | 270 | {} |
59 | | BOW+LR | 1.0 | 16695 | 546 | 270 | {} |
60 | | TOPN+NN100x50 | 0.9963 | 12104 | 546 | 270 | {'top_n': 500} |
61 | | TOPN+NN100x50 | 0.9963 | 4443 | 546 | 270 | {'top_n': 100} |
62 | | TOPN+NN100x50 | 0.9963 | 120 | 546 | 270 | {'top_n': 5} |
63 | | TOPN+NB | 0.9963 | 12104 | 546 | 270 | {'top_n': 500} |
64 | | TOPN+NB | 0.9963 | 8975 | 546 | 270 | {'top_n': 250} |
65 | | TOPN+NB | 0.9963 | 4443 | 546 | 270 | {'top_n': 100} |
66 | | TOPN+NB | 0.9963 | 2274 | 546 | 270 | {'top_n': 50} |
67 | | TOPN+LSVM | 0.9963 | 12104 | 546 | 270 | {'top_n': 500} |
68 | | TOPN+LSVM | 0.9963 | 8975 | 546 | 270 | {'top_n': 250} |
69 | | TOPN+LSVM | 0.9963 | 4443 | 546 | 270 | {'top_n': 100} |
70 | | TOPN+LSVM | 0.9963 | 2274 | 546 | 270 | {'top_n': 50} |
71 | | TOPN+LSVM | 0.9963 | 995 | 546 | 270 | {'top_n': 25} |
72 | | TOPN+LSVM | 0.9963 | 758 | 546 | 270 | {'top_n': 20} |
73 | | TOPN+LSVM | 0.9963 | 526 | 546 | 270 | {'top_n': 15} |
74 | | TOPN+LSVM | 0.9963 | 296 | 546 | 270 | {'top_n': 10} |
75 | | TOPN+LSVM | 0.9963 | 120 | 546 | 270 | {'top_n': 5} |
76 | | META+NB | 0.9963 | 2509 | 546 | 270 | {} |
77 | | LVAR+NN100x50 | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
78 | | LVAR+NN100x50 | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
79 | | LVAR+NN100x50 | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
80 | | LVAR+NN100x50 | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
81 | | LVAR+NN100x50 | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
82 | | LVAR+NN100x50 | 0.9963 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
83 | | LVAR+NB | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
84 | | LVAR+NB | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
85 | | LVAR+NB | 0.9963 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
86 | | LVAR+NB | 0.9963 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
87 | | LVAR+NB | 0.9963 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
88 | | LVAR+NB | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
89 | | LVAR+NB | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
90 | | LVAR+LSVM | 0.9963 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
91 | | LVAR+LSVM | 0.9963 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
92 | | LVAR+LSVM | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
93 | | LVAR+LSVM | 0.9963 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
94 | | LVAR+LSVM | 0.9963 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
95 | | LVAR+LR | 0.9963 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
96 | | KBEST+NN100x50 | 0.9963 | 14000 | 546 | 270 | {'kbest': 14000} |
97 | | KBEST+NN100x50 | 0.9963 | 10000 | 546 | 270 | {'kbest': 10000} |
98 | | KBEST+NN100x50 | 0.9963 | 2000 | 546 | 270 | {'kbest': 2000} |
99 | | KBEST+NB | 0.9963 | 14000 | 546 | 270 | {'kbest': 14000} |
100 | | KBEST+NB | 0.9963 | 10000 | 546 | 270 | {'kbest': 10000} |
101 | | KBEST+NB | 0.9963 | 7000 | 546 | 270 | {'kbest': 7000} |
102 | | KBEST+NB | 0.9963 | 6000 | 546 | 270 | {'kbest': 6000} |
103 | | KBEST+NB | 0.9963 | 5000 | 546 | 270 | {'kbest': 5000} |
104 | | KBEST+NB | 0.9963 | 4000 | 546 | 270 | {'kbest': 4000} |
105 | | KBEST+NB | 0.9963 | 3000 | 546 | 270 | {'kbest': 3000} |
106 | | KBEST+LSVM | 0.9963 | 14000 | 546 | 270 | {'kbest': 14000} |
107 | | KBEST+LSVM | 0.9963 | 10000 | 546 | 270 | {'kbest': 10000} |
108 | | KBEST+LSVM | 0.9963 | 7000 | 546 | 270 | {'kbest': 7000} |
109 | | KBEST+LSVM | 0.9963 | 6000 | 546 | 270 | {'kbest': 6000} |
110 | | KBEST+LSVM | 0.9963 | 5000 | 546 | 270 | {'kbest': 5000} |
111 | | KBEST+LSVM | 0.9963 | 4000 | 546 | 270 | {'kbest': 4000} |
112 | | KBEST+LSVM | 0.9963 | 3000 | 546 | 270 | {'kbest': 3000} |
113 | | KBEST+LSVM | 0.9963 | 2000 | 546 | 270 | {'kbest': 2000} |
114 | | KBEST+LR | 0.9963 | 7000 | 546 | 270 | {'kbest': 7000} |
115 | | KBEST+LR | 0.9963 | 6000 | 546 | 270 | {'kbest': 6000} |
116 | | KBEST+LR | 0.9963 | 5000 | 546 | 270 | {'kbest': 5000} |
117 | | KBEST+LR | 0.9963 | 4000 | 546 | 270 | {'kbest': 4000} |
118 | | KBEST+LR | 0.9963 | 3000 | 546 | 270 | {'kbest': 3000} |
119 | | KBEST+LR | 0.9963 | 2000 | 546 | 270 | {'kbest': 2000} |
120 | | BOW+NN100x50 | 0.9963 | 16695 | 546 | 270 | {} |
121 | | BOW+NB | 0.9963 | 16695 | 546 | 270 | {} |
122 | | TOPN+NN100x50 | 0.9926 | 526 | 546 | 270 | {'top_n': 15} |
123 | | TOPN+NN100x50 | 0.9926 | 296 | 546 | 270 | {'top_n': 10} |
124 | | TOPN+5NN | 0.9926 | 120 | 546 | 270 | {'top_n': 5} |
125 | | KBEST+NN100x50 | 0.9926 | 7000 | 546 | 270 | {'kbest': 7000} |
126 | | KBEST+NN100x50 | 0.9926 | 6000 | 546 | 270 | {'kbest': 6000} |
127 | | KBEST+NN100x50 | 0.9926 | 5000 | 546 | 270 | {'kbest': 5000} |
128 | | KBEST+NN100x50 | 0.9926 | 4000 | 546 | 270 | {'kbest': 4000} |
129 | | KBEST+NN100x50 | 0.9926 | 3000 | 546 | 270 | {'kbest': 3000} |
130 | | KBEST+1KNN | 0.9889 | 3000 | 546 | 270 | {'kbest': 3000} |
131 | | TOPN+2NN | 0.9852 | 120 | 546 | 270 | {'top_n': 5} |
132 | | TOPN+1KNN | 0.9852 | 120 | 546 | 270 | {'top_n': 5} |
133 | | KBEST+2NN | 0.9852 | 4000 | 546 | 270 | {'kbest': 4000} |
134 | | KBEST+1KNN | 0.9852 | 4000 | 546 | 270 | {'kbest': 4000} |
135 | | TOPN+NB | 0.9815 | 526 | 546 | 270 | {'top_n': 15} |
136 | | TOPN+NB | 0.9815 | 120 | 546 | 270 | {'top_n': 5} |
137 | | LVAR+NB | 0.9815 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
138 | | KBEST+2NN | 0.9815 | 3000 | 546 | 270 | {'kbest': 3000} |
139 | | KBEST+2NN | 0.9815 | 1000 | 546 | 270 | {'kbest': 1000} |
140 | | KBEST+1KNN | 0.9815 | 1000 | 546 | 270 | {'kbest': 1000} |
141 | | TOPN+NB | 0.9778 | 995 | 546 | 270 | {'top_n': 25} |
142 | | TOPN+NB | 0.9778 | 758 | 546 | 270 | {'top_n': 20} |
143 | | TOPN+NB | 0.9778 | 296 | 546 | 270 | {'top_n': 10} |
144 | | KBEST+NB | 0.9778 | 2000 | 546 | 270 | {'kbest': 2000} |
145 | | KBEST+NB | 0.9778 | 1000 | 546 | 270 | {'kbest': 1000} |
146 | | KBEST+5NN | 0.9778 | 5000 | 546 | 270 | {'kbest': 5000} |
147 | | KBEST+5NN | 0.9741 | 2000 | 546 | 270 | {'kbest': 2000} |
148 | | KBEST+2NN | 0.9741 | 5000 | 546 | 270 | {'kbest': 5000} |
149 | | KBEST+2NN | 0.9741 | 2000 | 546 | 270 | {'kbest': 2000} |
150 | | KBEST+1KNN | 0.9741 | 5000 | 546 | 270 | {'kbest': 5000} |
151 | | KBEST+1KNN | 0.9741 | 2000 | 546 | 270 | {'kbest': 2000} |
152 | | TOPN+5NN | 0.9704 | 296 | 546 | 270 | {'top_n': 10} |
153 | | TOPN+2NN | 0.9667 | 526 | 546 | 270 | {'top_n': 15} |
154 | | TOPN+1KNN | 0.9667 | 526 | 546 | 270 | {'top_n': 15} |
155 | | KBEST+5NN | 0.9667 | 6000 | 546 | 270 | {'kbest': 6000} |
156 | | KBEST+5NN | 0.9667 | 3000 | 546 | 270 | {'kbest': 3000} |
157 | | KBEST+2NN | 0.9667 | 6000 | 546 | 270 | {'kbest': 6000} |
158 | | KBEST+1KNN | 0.9667 | 6000 | 546 | 270 | {'kbest': 6000} |
159 | | KBEST+5NN | 0.963 | 4000 | 546 | 270 | {'kbest': 4000} |
160 | | TOPN+1KNN | 0.9593 | 296 | 546 | 270 | {'top_n': 10} |
161 | | KBEST+5NN | 0.9593 | 1000 | 546 | 270 | {'kbest': 1000} |
162 | | TOPN+2NN | 0.9556 | 296 | 546 | 270 | {'top_n': 10} |
163 | | KBEST+2NN | 0.9556 | 7000 | 546 | 270 | {'kbest': 7000} |
164 | | KBEST+1KNN | 0.9556 | 7000 | 546 | 270 | {'kbest': 7000} |
165 | | TOPN+2NN | 0.9519 | 995 | 546 | 270 | {'top_n': 25} |
166 | | TOPN+2NN | 0.9519 | 758 | 546 | 270 | {'top_n': 20} |
167 | | TOPN+1KNN | 0.9519 | 995 | 546 | 270 | {'top_n': 25} |
168 | | TOPN+1KNN | 0.9519 | 758 | 546 | 270 | {'top_n': 20} |
169 | | TOPN+5NN | 0.9481 | 526 | 546 | 270 | {'top_n': 15} |
170 | | KBEST+5NN | 0.9481 | 7000 | 546 | 270 | {'kbest': 7000} |
171 | | TOPN+5NN | 0.9407 | 758 | 546 | 270 | {'top_n': 20} |
172 | | TOPN+2NN | 0.9333 | 4443 | 546 | 270 | {'top_n': 100} |
173 | | TOPN+2NN | 0.9333 | 2274 | 546 | 270 | {'top_n': 50} |
174 | | TOPN+1KNN | 0.9333 | 4443 | 546 | 270 | {'top_n': 100} |
175 | | TOPN+1KNN | 0.9333 | 2274 | 546 | 270 | {'top_n': 50} |
176 | | TOPN+5NN | 0.9259 | 995 | 546 | 270 | {'top_n': 25} |
177 | | TOPN+2NN | 0.9259 | 12104 | 546 | 270 | {'top_n': 500} |
178 | | TOPN+2NN | 0.9259 | 8975 | 546 | 270 | {'top_n': 250} |
179 | | TOPN+1KNN | 0.9259 | 12104 | 546 | 270 | {'top_n': 500} |
180 | | TOPN+1KNN | 0.9259 | 8975 | 546 | 270 | {'top_n': 250} |
181 | | META+2NN | 0.9222 | 2509 | 546 | 270 | {} |
182 | | META+1KNN | 0.9222 | 2509 | 546 | 270 | {} |
183 | | LVAR+2NN | 0.9222 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
184 | | LVAR+1KNN | 0.9222 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
185 | | KBEST+2NN | 0.9222 | 14000 | 546 | 270 | {'kbest': 14000} |
186 | | KBEST+2NN | 0.9222 | 10000 | 546 | 270 | {'kbest': 10000} |
187 | | KBEST+1KNN | 0.9222 | 14000 | 546 | 270 | {'kbest': 14000} |
188 | | KBEST+1KNN | 0.9222 | 10000 | 546 | 270 | {'kbest': 10000} |
189 | | LVAR+2NN | 0.9185 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
190 | | LVAR+2NN | 0.9185 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
191 | | LVAR+2NN | 0.9185 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
192 | | LVAR+2NN | 0.9185 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
193 | | LVAR+1KNN | 0.9185 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
194 | | LVAR+1KNN | 0.9185 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
195 | | LVAR+1KNN | 0.9185 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
196 | | LVAR+1KNN | 0.9185 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
197 | | LVAR+2NN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
198 | | LVAR+2NN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
199 | | LVAR+2NN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
200 | | LVAR+1KNN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
201 | | LVAR+1KNN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
202 | | LVAR+1KNN | 0.9074 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
203 | | BOW+2NN | 0.9074 | 16695 | 546 | 270 | {} |
204 | | BOW+1KNN | 0.9074 | 16695 | 546 | 270 | {} |
205 | | TOPN+5NN | 0.8926 | 2274 | 546 | 270 | {'top_n': 50} |
206 | | LVAR+5NN | 0.8926 | 5464 | 546 | 270 | {'variance thershold': 0.01} |
207 | | KBEST+5NN | 0.8889 | 10000 | 546 | 270 | {'kbest': 10000} |
208 | | TOPN+5NN | 0.8778 | 4443 | 546 | 270 | {'top_n': 100} |
209 | | META+5NN | 0.8704 | 2509 | 546 | 270 | {} |
210 | | TOPN+5NN | 0.8481 | 8975 | 546 | 270 | {'top_n': 250} |
211 | | TOPN+5NN | 0.8407 | 12104 | 546 | 270 | {'top_n': 500} |
212 | | LVAR+5NN | 0.837 | 8234 | 546 | 270 | {'variance thershold': 0.005} |
213 | | LVAR+5NN | 0.837 | 8234 | 546 | 270 | {'variance thershold': 0.004} |
214 | | KBEST+5NN | 0.837 | 14000 | 546 | 270 | {'kbest': 14000} |
215 | | LVAR+5NN | 0.8333 | 16695 | 546 | 270 | {'variance thershold': 0.001} |
216 | | LVAR+5NN | 0.8333 | 16695 | 546 | 270 | {'variance thershold': 0.0015} |
217 | | LVAR+5NN | 0.8333 | 16695 | 546 | 270 | {'variance thershold': 0.0005} |
218 | | BOW+5NN | 0.8333 | 16695 | 546 | 270 | {} |
219 | | LVAR+5NN | 0.8296 | 11058 | 546 | 270 | {'variance thershold': 0.003} |
220 | | LVAR+5NN | 0.8296 | 11058 | 546 | 270 | {'variance thershold': 0.002} |
221 | +--------------------------+----------+--------------------+------------+-----------+--------------------------------+
222 | Execution time: 263.13525189999996
223 | !END OF THE EXPERIMENT!
224 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--------------------------------------------------------------------------------