├── GenerateBIB
    └── referencesTest_2020-05-19_18-50-59.bib
├── GenerateReports
    └── referencesTest_Report_2020-05-20_01-23-11.html
├── GoogleColab_GRB.ipynb
├── OriginalBIB
    └── referencesTest.bib
├── README.md
├── find_errors_bib.py
├── grb.py
├── nltk_config.py
├── requirements.txt
└── screenshots
    ├── bibNotGenerate.PNG
    ├── reportCongrats.PNG
    ├── reportErrorComLine.PNG
    ├── reportErrorOriginalBIB.PNG
    └── reportWarning.PNG


/GenerateBIB/referencesTest_2020-05-19_18-50-59.bib:
--------------------------------------------------------------------------------
  1 | @book{LabelDaCitacao,
  2 |     author = "Wolf, W.",
  3 |     title = "Computers as components: principles of embedded computing system design",
  4 |     year = "2001",
  5 |     address = "New York, EUA",
  6 |     publisher = "MISSING",
  7 |     numpages = "MISSING"
  8 | }
  9 | 
 10 | @article{boscarioli20172,
 11 |     author = "Boscarioli, C and Ara{\'u}jo, RM and Maciel, RSP",
 12 |     title = "I GranDSI-BR--Grand Research Challenges in Information Systems in Brazil 2016-2026",
 13 |     journal = "CE-SI - SBC",
 14 |     year = "2017",
 15 |     volume = "MISSING",
 16 |     month = "MISSING",
 17 |     pages = "MISSING"
 18 | }
 19 | 
 20 | @inproceedings{LabelDaCitacao1,
 21 |     author = "Wolf, W.",
 22 |     title = "Computers as components: principles of embedded computing system design",
 23 |     publisher = "Morgan Kaufmann Publishers",
 24 |     year = "2001",
 25 |     address = "New York, EUA",
 26 |     booktitle = "MISSING",
 27 |     pages = "MISSING"
 28 | }
 29 | 
 30 | @proceedings{conf/aaai/2011,
 31 |     title = "25th International Conference on Artificial Intelligence",
 32 |     year = "2011",
 33 |     bibsource = "dblp computer science bibliography, http://dblp.org",
 34 |     biburl = "http://dblp.uni-trier.de/rec/bib/conf/aaai/2011",
 35 |     comment = "conf/aaai/2011",
 36 |     timestamp = "Tue, 09 Aug 2011 07:56:46 +0200",
 37 |     author = "MISSING",
 38 |     booktitle = "MISSING",
 39 |     pages = "MISSING"
 40 | }
 41 | 
 42 | @incollection{simon1983should,
 43 |     author = "Simon, Herbert A",
 44 |     title = "Why should machines learn?",
 45 |     booktitle = "Machine learning",
 46 |     pages = "25--37",
 47 |     year = "1983",
 48 |     publisher = "Elsevier"
 49 | }
 50 | 
 51 | @techreport{Kitchenham2007,
 52 |     author = "Kitchenham, Barbara and Charters, Stuart",
 53 |     title = "Guidelines for performing systematic literature reviews in software engineering",
 54 |     NUMBER = "EBSE-2007-01",
 55 |     INSTITUTION = "Department of Computer Science, University of Durham",
 56 |     ADDRESS = "Durham, UK",
 57 |     year = "2007",
 58 |     publisher = "Citeseer",
 59 |     numpages = "MISSING"
 60 | }
 61 | 
 62 | @mastersthesis{Stenersen2015Guidance,
 63 |     author = "Stenersen, Thomas",
 64 |     title = "Guidance system for autonomous surface vehicles",
 65 |     year = "2015",
 66 |     school = "NTNU",
 67 |     numpages = "MISSING"
 68 | }
 69 | 
 70 | @phdthesis{forster2018hive,
 71 |     author = "Forster, Rodrigo Richard",
 72 |     title = "Hive on {Spark and MapReduce: A} methodology for parameter tuning",
 73 |     school = "NOVA Information Management School",
 74 |     numpages = "54",
 75 |     type = "Master Thesis",
 76 |     year = "2018"
 77 | }
 78 | 
 79 | @inbook{Jovanovic2016,
 80 |     author = "Jovanovic, Petar and Romero, Oscar and Abell{\'o}, Alberto",
 81 |     title = "A unified view of data-intensive flows in business intelligence systems: A survey",
 82 |     bookTitle = "Transactions on Large-Scale Data- and Knowledge-Centered Systems XXIX",
 83 |     publisher = "Springer",
 84 |     address = "Berlin, DE",
 85 |     pages = "66--107",
 86 |     year = "2016",
 87 |     chapter = "MISSING"
 88 | }
 89 | 
 90 | @booklet{booklet,
 91 |     author = "Caxton, Peter",
 92 |     title = "The title of the work",
 93 |     howpublished = "How it was published",
 94 |     address = "The address of the publisher",
 95 |     month = "7",
 96 |     note = "An optional note",
 97 |     numpages = "MISSING",
 98 |     year = "MISSING"
 99 | }
100 | 
101 | @misc{dubes1988,
102 |     author = "Dubes, Richard C and Jain, Anil K",
103 |     title = "Algorithms for clustering data",
104 |     year = "1988",
105 |     publisher = "Prentice hall Englewood Cliffs",
106 |     url = "MISSING",
107 |     urlaccessdate = "MISSING"
108 | }
109 | 


--------------------------------------------------------------------------------
/GoogleColab_GRB.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github",
  7 |         "colab_type": "text"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/ppgcc/GenerateReportBib/blob/master/GoogleColab_GRB.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "markdown",
 15 |       "metadata": {
 16 |         "id": "_0YxKxojEOJ5",
 17 |         "colab_type": "text"
 18 |       },
 19 |       "source": [
 20 |         "# ***GenerateReportBib (GRB)***\n",
 21 |         "- Por _Olimar Teixeira Borges_"
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "markdown",
 26 |       "metadata": {
 27 |         "id": "ya8_AH1KDkd-",
 28 |         "colab_type": "text"
 29 |       },
 30 |       "source": [
 31 |         "## - **ATENÇÃO:** Antes de iniciar a execução deste arquivo, é aconselhado ler a documentação completa ([README.md](https://github.com/olimarborges/GenerateReportBib/blob/master/README.md)) do repositório original. Na documentação estão os detalhes para entender como os resultados deste script devem ser interpretados.\n",
 32 |         "- Se preferir, você pode ir direto para a leitura dos tópicos: \n",
 33 |         "  - [Entendendo o GRB](https://github.com/olimarborges/GenerateReportBib/blob/master/README.md#entendendo-o-GRB)\n",
 34 |         "  - [Utilizando o GRB](https://github.com/olimarborges/GenerateReportBib/blob/master/README.md#utilizando-o-GRB)\n",
 35 |         "## - Assistam também ao vídeo no [YouTube](https://www.youtube.com/watch?v=qU_Uc38Y7pU&feature=youtu.be), onde explico como executar o Script no Google Colab passo-a-passo.\n",
 36 |         "## - Aproveitem o script!"
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "markdown",
 41 |       "metadata": {
 42 |         "id": "t4sEz4-CA027",
 43 |         "colab_type": "text"
 44 |       },
 45 |       "source": [
 46 |         "## Realize a cópia (clone) do Repositório Original do GitHub do GRB"
 47 |       ]
 48 |     },
 49 |     {
 50 |       "cell_type": "code",
 51 |       "metadata": {
 52 |         "id": "0mkiJ9oKUhQN",
 53 |         "colab_type": "code",
 54 |         "outputId": "6c5e92ad-644f-4ee8-a3c0-49572dd6446b",
 55 |         "colab": {
 56 |           "base_uri": "https://localhost:8080/",
 57 |           "height": 119
 58 |         }
 59 |       },
 60 |       "source": [
 61 |         "!git clone https://github.com/ppgcc/GenerateReportBib.git"
 62 |       ],
 63 |       "execution_count": 0,
 64 |       "outputs": [
 65 |         {
 66 |           "output_type": "stream",
 67 |           "text": [
 68 |             "Cloning into 'GenerateReportBib'...\n",
 69 |             "remote: Enumerating objects: 81, done.\u001b[K\n",
 70 |             "remote: Counting objects:   1% (1/81)\u001b[K\rremote: Counting objects:   2% (2/81)\u001b[K\rremote: Counting objects:   3% (3/81)\u001b[K\rremote: Counting objects:   4% (4/81)\u001b[K\rremote: Counting objects:   6% (5/81)\u001b[K\rremote: Counting objects:   7% (6/81)\u001b[K\rremote: Counting objects:   8% (7/81)\u001b[K\rremote: Counting objects:   9% (8/81)\u001b[K\rremote: Counting objects:  11% (9/81)\u001b[K\rremote: Counting objects:  12% (10/81)\u001b[K\rremote: Counting objects:  13% (11/81)\u001b[K\rremote: Counting objects:  14% (12/81)\u001b[K\rremote: Counting objects:  16% (13/81)\u001b[K\rremote: Counting objects:  17% (14/81)\u001b[K\rremote: Counting objects:  18% (15/81)\u001b[K\rremote: Counting objects:  19% (16/81)\u001b[K\rremote: Counting objects:  20% (17/81)\u001b[K\rremote: Counting objects:  22% (18/81)\u001b[K\rremote: Counting objects:  23% (19/81)\u001b[K\rremote: Counting objects:  24% (20/81)\u001b[K\rremote: Counting objects:  25% (21/81)\u001b[K\rremote: Counting objects:  27% (22/81)\u001b[K\rremote: Counting objects:  28% (23/81)\u001b[K\rremote: Counting objects:  29% (24/81)\u001b[K\rremote: Counting objects:  30% (25/81)\u001b[K\rremote: Counting objects:  32% (26/81)\u001b[K\rremote: Counting objects:  33% (27/81)\u001b[K\rremote: Counting objects:  34% (28/81)\u001b[K\rremote: Counting objects:  35% (29/81)\u001b[K\rremote: Counting objects:  37% (30/81)\u001b[K\rremote: Counting objects:  38% (31/81)\u001b[K\rremote: Counting objects:  39% (32/81)\u001b[K\rremote: Counting objects:  40% (33/81)\u001b[K\rremote: Counting objects:  41% (34/81)\u001b[K\rremote: Counting objects:  43% (35/81)\u001b[K\rremote: Counting objects:  44% (36/81)\u001b[K\rremote: Counting objects:  45% (37/81)\u001b[K\rremote: Counting objects:  46% (38/81)\u001b[K\rremote: Counting objects:  48% (39/81)\u001b[K\rremote: Counting objects:  49% (40/81)\u001b[K\rremote: Counting objects:  50% (41/81)\u001b[K\rremote: Counting objects:  51% (42/81)\u001b[K\rremote: Counting objects:  53% (43/81)\u001b[K\rremote: Counting objects:  54% (44/81)\u001b[K\rremote: Counting objects:  55% (45/81)\u001b[K\rremote: Counting objects:  56% (46/81)\u001b[K\rremote: Counting objects:  58% (47/81)\u001b[K\rremote: Counting objects:  59% (48/81)\u001b[K\rremote: Counting objects:  60% (49/81)\u001b[K\rremote: Counting objects:  61% (50/81)\u001b[K\rremote: Counting objects:  62% (51/81)\u001b[K\rremote: Counting objects:  64% (52/81)\u001b[K\rremote: Counting objects:  65% (53/81)\u001b[K\rremote: Counting objects:  66% (54/81)\u001b[K\rremote: Counting objects:  67% (55/81)\u001b[K\rremote: Counting objects:  69% (56/81)\u001b[K\rremote: Counting objects:  70% (57/81)\u001b[K\rremote: Counting objects:  71% (58/81)\u001b[K\rremote: Counting objects:  72% (59/81)\u001b[K\rremote: Counting objects:  74% (60/81)\u001b[K\rremote: Counting objects:  75% (61/81)\u001b[K\rremote: Counting objects:  76% (62/81)\u001b[K\rremote: Counting objects:  77% (63/81)\u001b[K\rremote: Counting objects:  79% (64/81)\u001b[K\rremote: Counting objects:  80% (65/81)\u001b[K\rremote: Counting objects:  81% (66/81)\u001b[K\rremote: Counting objects:  82% (67/81)\u001b[K\rremote: Counting objects:  83% (68/81)\u001b[K\rremote: Counting objects:  85% (69/81)\u001b[K\rremote: Counting objects:  86% (70/81)\u001b[K\rremote: Counting objects:  87% (71/81)\u001b[K\rremote: Counting objects:  88% (72/81)\u001b[K\rremote: Counting objects:  90% (73/81)\u001b[K\rremote: Counting objects:  91% (74/81)\u001b[K\rremote: Counting objects:  92% (75/81)\u001b[K\rremote: Counting objects:  93% (76/81)\u001b[K\rremote: Counting objects:  95% (77/81)\u001b[K\rremote: Counting objects:  96% (78/81)\u001b[K\rremote: Counting objects:  97% (79/81)\u001b[K\rremote: Counting objects:  98% (80/81)\u001b[K\rremote: Counting objects: 100% (81/81)\u001b[K\rremote: Counting objects: 100% (81/81), done.\u001b[K\n",
 71 |             "remote: Compressing objects:   1% (1/56)\u001b[K\rremote: Compressing objects:   3% (2/56)\u001b[K\rremote: Compressing objects:   5% (3/56)\u001b[K\rremote: Compressing objects:   7% (4/56)\u001b[K\rremote: Compressing objects:   8% (5/56)\u001b[K\rremote: Compressing objects:  10% (6/56)\u001b[K\rremote: Compressing objects:  12% (7/56)\u001b[K\rremote: Compressing objects:  14% (8/56)\u001b[K\rremote: Compressing objects:  16% (9/56)\u001b[K\rremote: Compressing objects:  17% (10/56)\u001b[K\rremote: Compressing objects:  19% (11/56)\u001b[K\rremote: Compressing objects:  21% (12/56)\u001b[K\rremote: Compressing objects:  23% (13/56)\u001b[K\rremote: Compressing objects:  25% (14/56)\u001b[K\rremote: Compressing objects:  26% (15/56)\u001b[K\rremote: Compressing objects:  28% (16/56)\u001b[K\rremote: Compressing objects:  30% (17/56)\u001b[K\rremote: Compressing objects:  32% (18/56)\u001b[K\rremote: Compressing objects:  33% (19/56)\u001b[K\rremote: Compressing objects:  35% (20/56)\u001b[K\rremote: Compressing objects:  37% (21/56)\u001b[K\rremote: Compressing objects:  39% (22/56)\u001b[K\rremote: Compressing objects:  41% (23/56)\u001b[K\rremote: Compressing objects:  42% (24/56)\u001b[K\rremote: Compressing objects:  44% (25/56)\u001b[K\rremote: Compressing objects:  46% (26/56)\u001b[K\rremote: Compressing objects:  48% (27/56)\u001b[K\rremote: Compressing objects:  50% (28/56)\u001b[K\rremote: Compressing objects:  51% (29/56)\u001b[K\rremote: Compressing objects:  53% (30/56)\u001b[K\rremote: Compressing objects:  55% (31/56)\u001b[K\rremote: Compressing objects:  57% (32/56)\u001b[K\rremote: Compressing objects:  58% (33/56)\u001b[K\rremote: Compressing objects:  60% (34/56)\u001b[K\rremote: Compressing objects:  62% (35/56)\u001b[K\rremote: Compressing objects:  64% (36/56)\u001b[K\rremote: Compressing objects:  66% (37/56)\u001b[K\rremote: Compressing objects:  67% (38/56)\u001b[K\rremote: Compressing objects:  69% (39/56)\u001b[K\rremote: Compressing objects:  71% (40/56)\u001b[K\rremote: Compressing objects:  73% (41/56)\u001b[K\rremote: Compressing objects:  75% (42/56)\u001b[K\rremote: Compressing objects:  76% (43/56)\u001b[K\rremote: Compressing objects:  78% (44/56)\u001b[K\rremote: Compressing objects:  80% (45/56)\u001b[K\rremote: Compressing objects:  82% (46/56)\u001b[K\rremote: Compressing objects:  83% (47/56)\u001b[K\rremote: Compressing objects:  85% (48/56)\u001b[K\rremote: Compressing objects:  87% (49/56)\u001b[K\rremote: Compressing objects:  89% (50/56)\u001b[K\rremote: Compressing objects:  91% (51/56)\u001b[K\rremote: Compressing objects:  92% (52/56)\u001b[K\rremote: Compressing objects:  94% (53/56)\u001b[K\rremote: Compressing objects:  96% (54/56)\u001b[K\rremote: Compressing objects:  98% (55/56)\u001b[K\rremote: Compressing objects: 100% (56/56)\u001b[K\rremote: Compressing objects: 100% (56/56), done.\u001b[K\n",
 72 |             "Unpacking objects:   1% (1/81)   \rUnpacking objects:   2% (2/81)   \rUnpacking objects:   3% (3/81)   \rUnpacking objects:   4% (4/81)   \rUnpacking objects:   6% (5/81)   \rUnpacking objects:   7% (6/81)   \rUnpacking objects:   8% (7/81)   \rUnpacking objects:   9% (8/81)   \rUnpacking objects:  11% (9/81)   \rUnpacking objects:  12% (10/81)   \rUnpacking objects:  13% (11/81)   \rUnpacking objects:  14% (12/81)   \rUnpacking objects:  16% (13/81)   \rUnpacking objects:  17% (14/81)   \rUnpacking objects:  18% (15/81)   \rUnpacking objects:  19% (16/81)   \rUnpacking objects:  20% (17/81)   \rUnpacking objects:  22% (18/81)   \rUnpacking objects:  23% (19/81)   \rUnpacking objects:  24% (20/81)   \rUnpacking objects:  25% (21/81)   \rUnpacking objects:  27% (22/81)   \rUnpacking objects:  28% (23/81)   \rUnpacking objects:  29% (24/81)   \rUnpacking objects:  30% (25/81)   \rUnpacking objects:  32% (26/81)   \rUnpacking objects:  33% (27/81)   \rUnpacking objects:  34% (28/81)   \rUnpacking objects:  35% (29/81)   \rUnpacking objects:  37% (30/81)   \rUnpacking objects:  38% (31/81)   \rUnpacking objects:  39% (32/81)   \rUnpacking objects:  40% (33/81)   \rUnpacking objects:  41% (34/81)   \rUnpacking objects:  43% (35/81)   \rUnpacking objects:  44% (36/81)   \rUnpacking objects:  45% (37/81)   \rUnpacking objects:  46% (38/81)   \rremote: Total 81 (delta 33), reused 64 (delta 19), pack-reused 0\u001b[K\n",
 73 |             "Unpacking objects:  48% (39/81)   \rUnpacking objects:  49% (40/81)   \rUnpacking objects:  50% (41/81)   \rUnpacking objects:  51% (42/81)   \rUnpacking objects:  53% (43/81)   \rUnpacking objects:  54% (44/81)   \rUnpacking objects:  55% (45/81)   \rUnpacking objects:  56% (46/81)   \rUnpacking objects:  58% (47/81)   \rUnpacking objects:  59% (48/81)   \rUnpacking objects:  60% (49/81)   \rUnpacking objects:  61% (50/81)   \rUnpacking objects:  62% (51/81)   \rUnpacking objects:  64% (52/81)   \rUnpacking objects:  65% (53/81)   \rUnpacking objects:  66% (54/81)   \rUnpacking objects:  67% (55/81)   \rUnpacking objects:  69% (56/81)   \rUnpacking objects:  70% (57/81)   \rUnpacking objects:  71% (58/81)   \rUnpacking objects:  72% (59/81)   \rUnpacking objects:  74% (60/81)   \rUnpacking objects:  75% (61/81)   \rUnpacking objects:  76% (62/81)   \rUnpacking objects:  77% (63/81)   \rUnpacking objects:  79% (64/81)   \rUnpacking objects:  80% (65/81)   \rUnpacking objects:  81% (66/81)   \rUnpacking objects:  82% (67/81)   \rUnpacking objects:  83% (68/81)   \rUnpacking objects:  85% (69/81)   \rUnpacking objects:  86% (70/81)   \rUnpacking objects:  87% (71/81)   \rUnpacking objects:  88% (72/81)   \rUnpacking objects:  90% (73/81)   \rUnpacking objects:  91% (74/81)   \rUnpacking objects:  92% (75/81)   \rUnpacking objects:  93% (76/81)   \rUnpacking objects:  95% (77/81)   \rUnpacking objects:  96% (78/81)   \rUnpacking objects:  97% (79/81)   \rUnpacking objects:  98% (80/81)   \rUnpacking objects: 100% (81/81)   \rUnpacking objects: 100% (81/81), done.\n"
 74 |           ],
 75 |           "name": "stdout"
 76 |         }
 77 |       ]
 78 |     },
 79 |     {
 80 |       "cell_type": "markdown",
 81 |       "metadata": {
 82 |         "id": "iBDB2oyTWtBO",
 83 |         "colab_type": "text"
 84 |       },
 85 |       "source": [
 86 |         "## Estrutura de pastas do Drive para encontrar os arquivos"
 87 |       ]
 88 |     },
 89 |     {
 90 |       "cell_type": "markdown",
 91 |       "metadata": {
 92 |         "id": "ioN-5g1vBZW5",
 93 |         "colab_type": "text"
 94 |       },
 95 |       "source": [
 96 |         "### Verifique se a pasta GenerateReportBib foi 'montada' aqui\n",
 97 |         "- Basta a pasta GenerateReportBib aparecer listada após a execução do 'ls'\n",
 98 |         "- Ou selecione o símbolo de 'pasta' com o label *Files*, que encontra-se do lado esquerdo deste script. Após selecionar este botão, será apresentada a estrutura de pastas."
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "metadata": {
104 |         "id": "1K0ZDLBzxib3",
105 |         "colab_type": "code",
106 |         "outputId": "e268306a-e59f-4fa3-8428-6c8211421797",
107 |         "colab": {
108 |           "base_uri": "https://localhost:8080/",
109 |           "height": 34
110 |         }
111 |       },
112 |       "source": [
113 |         "!ls"
114 |       ],
115 |       "execution_count": 0,
116 |       "outputs": [
117 |         {
118 |           "output_type": "stream",
119 |           "text": [
120 |             "drive  GenerateReportBib  sample_data\n"
121 |           ],
122 |           "name": "stdout"
123 |         }
124 |       ]
125 |     },
126 |     {
127 |       "cell_type": "markdown",
128 |       "metadata": {
129 |         "id": "SZHQF9rABO6a",
130 |         "colab_type": "text"
131 |       },
132 |       "source": [
133 |         "### Acesse a pasta do Repositório onde o projeto está localizado"
134 |       ]
135 |     },
136 |     {
137 |       "cell_type": "code",
138 |       "metadata": {
139 |         "id": "kqb7SjubBUfv",
140 |         "colab_type": "code",
141 |         "colab": {}
142 |       },
143 |       "source": [
144 |         "%cd GenerateReportBib"
145 |       ],
146 |       "execution_count": 0,
147 |       "outputs": []
148 |     },
149 |     {
150 |       "cell_type": "markdown",
151 |       "metadata": {
152 |         "id": "l2htnJ3aCHNZ",
153 |         "colab_type": "text"
154 |       },
155 |       "source": [
156 |         "### Verifique se você está dentro da pasta GenerateReportBib\n",
157 |         "- Basta verificar se a estrutura de pastas e arquivos do projeto serão listadas após a execução do \"ls\""
158 |       ]
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "metadata": {
163 |         "id": "TtOO0jpkBpaf",
164 |         "colab_type": "code",
165 |         "outputId": "15432d92-5394-41e1-f20f-8ee09b42079f",
166 |         "colab": {
167 |           "base_uri": "https://localhost:8080/",
168 |           "height": 51
169 |         }
170 |       },
171 |       "source": [
172 |         "!ls"
173 |       ],
174 |       "execution_count": 0,
175 |       "outputs": [
176 |         {
177 |           "output_type": "stream",
178 |           "text": [
179 |             "GenerateBIB\t grb.py\t\t OriginalBIB  requirements.txt\n",
180 |             "GenerateReports  nltk_config.py  README.md    screenshots\n"
181 |           ],
182 |           "name": "stdout"
183 |         }
184 |       ]
185 |     },
186 |     {
187 |       "cell_type": "markdown",
188 |       "metadata": {
189 |         "id": "dvJRgjrhWmnt",
190 |         "colab_type": "text"
191 |       },
192 |       "source": [
193 |         "## Dependências do Projeto"
194 |       ]
195 |     },
196 |     {
197 |       "cell_type": "markdown",
198 |       "metadata": {
199 |         "id": "h3hE-C9QCfoD",
200 |         "colab_type": "text"
201 |       },
202 |       "source": [
203 |         "### Instale as dependências"
204 |       ]
205 |     },
206 |     {
207 |       "cell_type": "code",
208 |       "metadata": {
209 |         "id": "Z9cjb9OhWjLx",
210 |         "colab_type": "code",
211 |         "colab": {}
212 |       },
213 |       "source": [
214 |         "!pip install pathlib\n",
215 |         "!pip install grip\n",
216 |         "!pip install markdown2\n",
217 |         "!pip install click\n",
218 |         "!pip install wkhtmltopdf\n",
219 |         "!pip install langdetect\n",
220 |         "!pip install pybtex"
221 |       ],
222 |       "execution_count": 0,
223 |       "outputs": []
224 |     },
225 |     {
226 |       "cell_type": "markdown",
227 |       "metadata": {
228 |         "id": "V3d8Q2yACnCX",
229 |         "colab_type": "text"
230 |       },
231 |       "source": [
232 |         "### Stopwords do NLTK\n",
233 |         "- Instale as configurações e corpus das stopwords do NLTK de forma separada das listadas acima"
234 |       ]
235 |     },
236 |     {
237 |       "cell_type": "code",
238 |       "metadata": {
239 |         "id": "Zd1i3UJi_H1E",
240 |         "colab_type": "code",
241 |         "outputId": "a9eee4a6-4ea4-42e7-efec-06493273b5e3",
242 |         "colab": {
243 |           "base_uri": "https://localhost:8080/",
244 |           "height": 102
245 |         }
246 |       },
247 |       "source": [
248 |         "!pip install nltk\n",
249 |         "import nltk\n",
250 |         "from nltk.corpus import stopwords\n",
251 |         "nltk.download('stopwords')"
252 |       ],
253 |       "execution_count": 0,
254 |       "outputs": [
255 |         {
256 |           "output_type": "stream",
257 |           "text": [
258 |             "Requirement already satisfied: nltk in /usr/local/lib/python3.6/dist-packages (3.2.5)\n",
259 |             "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from nltk) (1.12.0)\n",
260 |             "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
261 |             "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
262 |           ],
263 |           "name": "stdout"
264 |         },
265 |         {
266 |           "output_type": "execute_result",
267 |           "data": {
268 |             "text/plain": [
269 |               "True"
270 |             ]
271 |           },
272 |           "metadata": {
273 |             "tags": []
274 |           },
275 |           "execution_count": 28
276 |         }
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "markdown",
281 |       "metadata": {
282 |         "id": "K7-1saQ9W2D2",
283 |         "colab_type": "text"
284 |       },
285 |       "source": [
286 |         "## Executar o Script grb.py"
287 |       ]
288 |     },
289 |     {
290 |       "cell_type": "markdown",
291 |       "metadata": {
292 |         "id": "7jYwXaZBHNqf",
293 |         "colab_type": "text"
294 |       },
295 |       "source": [
296 |         "### Primeiro, faça o upload do seu `arquivo.bib` para dentro da pasta `OriginalBIB`\n",
297 |         "- Na estrutura de pastas localizada à esquerda deste script, ao passar o cursor do mouse sob a pasta `OriginalBIB`, aparcerá um botão com três pontos (...). Selecione ele e após faça upload do seu `arquivo.bib`."
298 |       ]
299 |     },
300 |     {
301 |       "cell_type": "markdown",
302 |       "metadata": {
303 |         "id": "uTWB65tYC-1B",
304 |         "colab_type": "text"
305 |       },
306 |       "source": [
307 |         "### Execute o script substituindo os valores `<parametro>` pelas suas informações:\n",
308 |         "\n",
309 |         "```bash\n",
310 |         "!python grb.py <arquivo.bib> -L <idioma> -T <tipo>\n",
311 |         "```\n",
312 |         "### Exemplo de execução:\n",
313 |         "```bash\n",
314 |         "!python grb.py referencesTest.bib -L en -T num\n",
315 |         "```"
316 |       ]
317 |     },
318 |     {
319 |       "cell_type": "code",
320 |       "metadata": {
321 |         "id": "Rof0-WoeW9TG",
322 |         "colab_type": "code",
323 |         "outputId": "ff0850b2-a87c-409e-cc33-2203e991922d",
324 |         "colab": {
325 |           "base_uri": "https://localhost:8080/",
326 |           "height": 153
327 |         }
328 |       },
329 |       "source": [
330 |         "!python grb.py referencesTest.bib -L en -T num"
331 |       ],
332 |       "execution_count": 0,
333 |       "outputs": [
334 |         {
335 |           "output_type": "stream",
336 |           "text": [
337 |             "LANGUAGE:  en\n",
338 |             "TYPE_REFERENCES:  num\n",
339 |             "FILE_NAME:  referencesTest.bib\n",
340 |             "Exporting to referencesTest_Report_2020-05-25_18-04-54.html\n",
341 |             " * Downloading style https://github.githubassets.com/assets/frameworks-2ad7892273edb163fa6783a69e80cc48.css\n",
342 |             " * Downloading style https://github.githubassets.com/assets/site-c72d4cad0131ab28dbff38b082b939e1.css\n",
343 |             " * Downloading style https://github.githubassets.com/assets/github-c20dd055fa2771ecfba624428d9ca917.css\n",
344 |             " * Cached all downloads in /root/.grip/cache-4.5.2\n"
345 |           ],
346 |           "name": "stdout"
347 |         }
348 |       ]
349 |     },
350 |     {
351 |       "cell_type": "markdown",
352 |       "metadata": {
353 |         "id": "v2T0vlNPDB9p",
354 |         "colab_type": "text"
355 |       },
356 |       "source": [
357 |         "## Ao final desta execução, os relatórios serão gerados na pasta \"GenerateReports\" e os arquivos bib com as correções, na pasta \"GenerateBIB\".\n",
358 |         "- Faça os downloads dos respectivos arquivos para ter acesso aos relatórios e arquivos.bib gerados após a execução.\n",
359 |         "- Os horários que serão inseridos nos nomes dos arquivos, provavelmente não serão os atuais, pois o script pegará o horário do servidor em que ele estiver executando remotamente e pode não ser o mesmo que o seu. \n",
360 |         "- Após a execução do script, poderá demorar alguns segundos para que os arquivos (`report.html` e `novo_arquivo.bib`) apareçam nas pastas. Espere um pouco, feche e abra as pastas até aparecer.\n",
361 |         "### **- DICAS:**\n",
362 |         "1. Dependendo de como estiver o seu `arquivo.bib`, os primeiros relatórios servirão apenas para que você arrume alguns detalhes no seu `arquivo.bib` e execute novamente o `grb.py`.\n",
363 |         "2. Antes de iniciar as correções em seu `arquivo.bib`, corrija os avisos do tipo `Type not implemented`, deixando apenas os labels válidos e execute o grb.py novamente. Desta forma, o `novo_arquivo.bib` também será gerado e você poderá começar a usá-lo!\n",
364 |         "3. Depois que este `grb.py` gerar o seu primeiro `novo_arquivo.bib`, este novo arquivo já possuirá algumas correções, mas no relatório, ele listará todos os erros encontrados no arquivo original, mesmo que ele já tenha realizado algumas correções. A partir desse `novo_arquivo.bib` (com as correções realizadas pelo `grb.py`), passe ele uma segunda vez no `grb.py` como se fosse o seu arquivo .bib original, pois desta forma, grande parte dos itens listados no primeiro relatório como erro, não serão mais listados no próximo relatório. A partir desse momento, você pode iniciar o processo de correção das suas referências.\n",
365 |         "4. A cada nova iteração, execute o `grb.py` com o novo_arquivo.bib corrigido, para que mais uma vez seja gerado um novo relatório, até que o `grb.py` finalmente apresente o relatório final de \"PARA-BENS\".  "
366 |       ]
367 |     }
368 |   ],
369 |   "metadata": {
370 |     "colab": {
371 |       "name": "GenerateRefBib.ipynb",
372 |       "provenance": [],
373 |       "toc_visible": true,
374 |       "include_colab_link": true
375 |     },
376 |     "kernelspec": {
377 |       "name": "python3",
378 |       "display_name": "Python 3"
379 |     }
380 |   },
381 |   "nbformat": 4,
382 |   "nbformat_minor": 0
383 | }


--------------------------------------------------------------------------------
/OriginalBIB/referencesTest.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @book{LabelDaCitacao,
  3 | 	title={Computers as components: principles of embedded computing system design},
  4 | 	author={W. Wolf},
  5 | 	year={2001},
  6 | 	address={New York, EUA}
  7 | }
  8 | 
  9 | @inproceedings{boscariol,
 10 | 	title={I GranDSI-BR--Grand Research Challenges in Information Systems in Brazil 2016-2026},
 11 | 	author={Boscarioli, C and Ara{\'u}jo, RM and Maciel, RSP},
 12 | 	journal={CE-SI - SBC},
 13 | 	year={2017}
 14 | }
 15 | 
 16 | @article{boscarioli20172,
 17 | 	title={I GranDSI-BR--Grand Research Challenges in Information Systems in Brazil 2016-2026},
 18 | 	author={Boscarioli, C and Ara{\'u}jo, RM and Maciel, RSP},
 19 | 	journal={CE-SI - SBC \& Publishers},
 20 | 	year={2017}
 21 | }
 22 | 
 23 | @inproceedings{LabelDaCitacao1,
 24 | 	title={Computers as components: principles of embedded computing system design},
 25 | 	author={W. Wolf},
 26 | 	publisher={Morgan Kaufmann Publishers},
 27 | 	year={2001},
 28 | 	address={New York, EUA}
 29 | }
 30 | 
 31 | @proceedings{conf/aaai/2011,
 32 |   title     = {25th International Conference on Artificial Intelligence},
 33 |   year      = {2011},
 34 |   bibsource = {dblp computer science bibliography, http://dblp.org},
 35 |   biburl    = {http://dblp.uni-trier.de/rec/bib/conf/aaai/2011},
 36 |   comment   = {conf/aaai/2011},
 37 |   timestamp = {Tue, 09 Aug 2011 07:56:46 +0200},
 38 | }
 39 | 
 40 | @incollection{simon1983should,
 41 | 	title={Why should machines learn?},
 42 | 	author={Simon, Herbert A},
 43 | 	booktitle={Machine learning},
 44 | 	pages={25--37},
 45 | 	year={1983},
 46 | 	publisher={Elsevier}
 47 | }
 48 | 
 49 | @techreport{Kitchenham2007,
 50 | 	title={Guidelines for performing systematic literature reviews in software engineering},
 51 | 	author={Kitchenham, Barbara and Charters, Stuart},
 52 | 	NUMBER = {EBSE-2007-01},
 53 | 	INSTITUTION = {Department of Computer Science, University of Durham},
 54 | 	ADDRESS = {Durham, UK},
 55 | 	year={2007},
 56 | 	publisher={Citeseer}
 57 | }
 58 | 
 59 | @mastersthesis{Stenersen2015Guidance,
 60 | 	title={Guidance system for autonomous surface vehicles},
 61 | 	author={Stenersen, Thomas},
 62 | 	year={2015},
 63 | 	school={NTNU}
 64 | }
 65 | 
 66 | @phdthesis{forster2018hive,
 67 | 	author = {Forster, Rodrigo Richard},
 68 | 	title = {Hive on {Spark and MapReduce: A} methodology for parameter tuning},
 69 | 	school = {NOVA Information Management School},
 70 | 	numpages = {54},
 71 | 	type = {Master Thesis},
 72 | 	year = {2018},
 73 | }
 74 | 
 75 | @inbook{Jovanovic2016,
 76 | 	author = {Jovanovic, Petar
 77 | 	and Romero, Oscar
 78 | 	and Abell{\'o}, Alberto},
 79 | 	title = {A unified view of data-intensive flows in business intelligence systems: A survey},
 80 | 	bookTitle = {Transactions on Large-Scale Data- and Knowledge-Centered Systems XXIX},
 81 | 	publisher = {Springer},
 82 | 	address = {Berlin, DE},
 83 | 	pages = {66--107},
 84 | 	year = {2016},
 85 | }
 86 | 
 87 | @booklet{bookletTest,
 88 |   title        = {The title of the work},
 89 |   author       = {Peter Caxton},
 90 |   howpublished = {How it was published},
 91 |   address      = {The address of the publisher},
 92 |   month        = {Janeiro},
 93 |   note         = {An optional note}
 94 | }
 95 | 
 96 | @misc{dubes1988,
 97 | 	title={Algorithms for clustering data},
 98 | 	author={Dubes, Richard C and Jain, Anil K},
 99 | 	year={1988},
100 | 	publisher={Prentice hall Englewood Cliffs},
101 | }
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ### Assistam ao vídeo no YouTube, onde explico como executar o Script no Google Colab passo-a-passo. 
  2 | #### E não esqueça de dar aquele like e ativar o sininho! (Zueria né! ;D)
  3 | ### Aproveitem o Script!
  4 | [![](https://img.shields.io/static/v1?label=YouTube&message=Acesse%20o%20V%C3%ADdeo&color=red)](https://www.youtube.com/watch?v=qU_Uc38Y7pU&feature=youtu.be)
  5 | 
  6 | ### Usem o arquivo no Colab para executar o script online:
  7 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ppgcc/GenerateReportBib/blob/master/GoogleColab_GRB.ipynb)
  8 | 
  9 | # Generate Report Bib (GRB.py)
 10 | 
 11 | Este projeto é uma iniciativa do aluno de Doutorado **[Olimar Teixeira Borges](https://github.com/olimarborges)**, a partir do código base (arquivo `find_erros_bib.py` listado neste repositório) do Alumni de Mestrado **[Pedro Ballester](https://github.com/ballester)**. Tendo como objetivo auxiliar na padronização das Referências Bibliográficas utilizadas nas Dissertações de Mestrado e Teses de Doutorado do PPGCC.
 12 | 
 13 | No entanto, após o uso deste projeto, não significará uma garantia de que seu `arquivo.bib` está totalmente correto. Não isentando o(a) pesquisador(a) de verificar a adequação de suas referências com a padronização seguida pelo PPGCC/PUCRS. Mas ele servirá como uma forma de auxílio durante o processo de verificação das suas referências bibliográfias.
 14 | 
 15 | No momento, as verificações programadas neste projeto seguem a padronização definida no documento [Formatos de Monografias, Dissertações e Teses do PPGCC](http://www.pucrs.br/politecnica-prov/wp-content/uploads/sites/166/2018/10/padrao_teses_dissertacoes_monografias_PPGCC_18102018.pdf), como também o [Documento Auxiliar](https://github.com/ppgcc/DocumentosPPGCC) criado pelo mesmo representante discente. Ele não está configurado para verificar as adequações em relação às bibliografias utilizadas em templates da ACM, IEEE, Springer e etc. Mas futuramente, pode ser trabalhado  para contemplar demais templates.
 16 | 
 17 | **ATENÇÃO!**
 18 |   - Antes de fazer o uso deste projeto, leia a seção [Utilização](#utilização).
 19 |   - E também assista ao vídeo no [YouTube](https://www.youtube.com/watch?v=qU_Uc38Y7pU&feature=youtu.be), onde explico como executar o Script no Google Colab passo-a-passo.
 20 |   - O arquivo principal deste projeto é o script **`grb.py`**, que é o responsável por realizar todo o processamento e verificações do seu **`arquivo.bib`**, como também gerar os **`relatórios de erros`**.
 21 |   - Para fazer uso do **grb.py** você não precisa mexer nele. A sua edição só é estimulada, caso pretenda contribuir com melhorias e refatorações no código. Caso contrário, qualquer mudança pode ocasionar mal funcionamento na geração dos **`relatórios de erros`** e do **`novo_arquivo.bib`**.
 22 | 
 23 | ___
 24 | ## Tabela de Conteúdos
 25 | 
 26 | - [Utilização](#utilização)
 27 | - [Resumo das Funcionalidades](#resumo-das-funcionalidades)
 28 | - [Configuração do Ambiente Python](#configuração-do-ambiente-python)
 29 | 	- [Ambiente Virtual Python](#ambiente-virtual-python)
 30 | 		- [Configurando o Ambiente](#configurando-o-ambiente)
 31 | - [Dependências do Projeto](#dependências-do-projeto)
 32 | - [Entendendo o GRB](#entendendo-o-grb)
 33 | - [Utilizando o GRB](#utilizando-o-grb)
 34 | 	- [Possíveis Situações de Relatórios de Erros](#possíveis-situações-de-relatórios-de-erros)
 35 | 		- [Erro nos parâmetros do script](#erro-nos-parâmetros-do-script)
 36 | 		- [Erro no arquivo BIB](#erro-no-arquivo-bib)
 37 | 	- [Principais Relatórios](#principais-relatórios)
 38 | 		- [Relatório com os Avisos](#relatório-com-os-avisos)
 39 | 		- [Novo arquivo BIB](#novo-arquivo-bib)
 40 | 		- [Relatório 'PARA-BÉNS'](#relatório-para-béns)
 41 | - [Agradecimentos](#agradecimentos)
 42 | - [Contribuindo](#contribuindo)
 43 | 
 44 | ___
 45 | 
 46 | ## Utilização
 47 | 
 48 | Você pode optar por fazer o uso deste projeto de duas formas:
 49 | 1. Fazer o download deste repositório (ou clone) em seu ambiente (computador). Desta forma, você precisará realizar toda a configuração e instalação de pacotes listadas nas próximas duas seções:
 50 | 	- [Configuração do Ambiente Python](#configuração-do-ambiente-python)
 51 | 	- [Dependências do Projeto](#dependências-do-projeto)
 52 | 2. Ou você pode utilizar a versão criada no Google Colab. Desta forma, a execução ficará dentro do ambiente do Colab e você NÃO precisará fazer download ou clone deste projeto para a sua máquina. Para esta versão, clique no botão `Open in Colab`, localizado no topo deste README.md. A partir desta escolha, para entender o funcionamento deste projeto, você pode ir direto para as seções:
 53 | 	- [Entendendo o GRB](#entendendo-o-grb)
 54 | 	- [Utilizando o GRB](#utilizando-o-grb)
 55 |   - Caso opte pelo uso no Colab, lembre-se de salvar o arquivo `GoogleColab_GRB.ipynb` para você em seu Drive ou de forma local. Pois senão tudo o que você fizer neste arquivo, após o fechamento da aba, será perdido. Ou seja, tanto os relatórios gerados quanto os arquivos .bib com as correções.
 56 | 
 57 | ___
 58 | 
 59 | ## Resumo das Funcionalidades
 60 | 
 61 | Resumidamente, as principais funcionalidades desse **grb.py** são:
 62 | 
 63 | - Gerar relatórios informativos sobre a adequação do seu `arquivo.bib` em relação às regras de padronização definidas no documento de Formatos de Monografias, Dissertações e Teses do PPGCC.
 64 | - Geração de um `novo_arquivo.bib` (baseado no seu `arquivo.bib` de referências orginal) com as seguintes correções:
 65 |   - serão inseridos os campos faltantes junto à palavra 'MISSING'. Por exemplo, se o **grb.py** identificar que para a sua referência 'X', do tipo '@article', a informação do 'mês' for obrigatória e no seu `arquivo.bib` original a tag 'month' não constar, o **grb.py** irá regerar essa referênicia 'X', adicionando a tag `month='MISSING'`. Desta forma, basta abrir o `novo_arquivo.bib` gerado, procurar pelas palavras 'MISSING' e no lugar, inserir a informação correta referente àquela citação.
 66 |   - retira a informação de ano, caso ela conste nos nomes das conferências, jornals, editoras de livros e etc. Existe um campo específico para a informação de ano `year={}`, portanto ela não tem que se repetir junto aos nomes dos eventos. As tags que passam por esta verificação são: `{publisher, journal, booktitle, school e institution}`.
 67 |   - para as palavras acrônimas ('IEEE', 'ACM', ..., por exemplo), que constam nos nomes das conferências, jornals, editoras de livros e etc., são inseridas chaves `{ }` na determinada palavra, quando ela já não possuir. Isso serve para que quando o LaTeX for compilar, ele não retire o 'formato uppercase' da palavra. Caso contrário, mesmo que no seu `arquivo.bib` a palavra estiver correta (`ACM`, por exemplo), quando o LaTeX compilar, ele vai gerar `Acm`. Por isso a necessidades do uso das chaves `{ }`, nestas palavras. As tags que passam por esta verificação são: `{publisher, journal, booktitle, school e institution}`.
 68 |   - realiza a capitalização dos campos referentes aos nomes das conferências, jornals, editoras de livros e etc. As tags que passam por esta verificação são: `{publisher, journal, booktitle, school e institution}`. No entanto, há algumas restrições de funcionamento correto para:
 69 |   	- somente nomes escritos nos idiomas `português`, `inglês`, `espanhol` e `alemão`. Pois o **grb.py** precisa realizar o uso e download de `STOPWORDS` do determinado idioma. Estes idiomas foram configurados, por terem sido os mais identificados nos testes realizados com os `arquivos.bib` enviados para testes por demais colegas do PPGCC.
 70 | 	- além disso, os valores destas tags precisam estar entre aspas duplas `" "`, pois caso elas estejam entre chaves `{ }` (por exemplo, `publisher={Texto...}`), o **grb.py** não conseguirá capitalizar. Portanto, caso o relatório identifique que determinado campo não está capitalizado e você perceber que ele está, confira se esta tag não encontra-se entre chaves `{ }` e em caso positivo, substitua por `" "`, para que o **grb.py** consiga validar a capitalização corretamente.
 71 | 
 72 | Ressaltanto que estas verificações podem se comportar de maneira inesperada, caso as informações que constem nas tags não estejam adequadas. Por exemplo, se o nome da conferência `Proceedings of the ACM/IEEE International Conference on Software Engineering`, estiver escrita de forma incorreta, com palavras unidas, por exemplo: `Proceedings of the ACM/IEEEInternational Conference on SoftwareEngineering`, a verificação vai considerar como se fosse apenas uma palavra, resultando em `Proceedings of the Acm/ieeeinternational Conference on Softwareengineering`.
 73 | 
 74 | Além disso, caso alguma palavra 'stopword' esteja com a identificação de acentuação (por exemplo: `n{\~a}o`), ela não será caracterizada como tal (stopword) e portanto sofrerá capitalização (`N{\~a}o`). Por este e outros motivos, sempre consulte o relatório gerado e verifique as mensagens. Pois nestes exemplos, haverão informações de que estas conferências não estavam com seu nome capitalizado. E portanto, acesse o `novo_arquivo.bib` e verifique se o **grb.py** realizou a capitalização de forma adequada. Isso vale para todas as correções listadas acima, que o **grb.py** realiza de forma automatizada.
 75 | 
 76 | ___
 77 | 
 78 | ## Configuração do Ambiente Python
 79 | 
 80 | Para que você possa executar este projeto, será necessário ter instalado em sua máquina o `Python3` com o gerenciador de pacotes `pip`. Se você já possui o ambiente configurado, pode pular este tópico.
 81 | 
 82 | ### Ambiente Virtual Python
 83 | 
 84 | Para quem ainda não tem instalado o Python3, sugiro fazer uso do `virtualenv`. É uma forma de criar ambientes virtuais para o uso do Python, com o objetivo de deixar as modificações e pacotes em um ambiente local e não global, evitando assim, 'bagunçar' as configurações originais do seu ambiente Python, além de não deixar 'sujeira' de configurações e pacotes instalados. Você pode configurar vários ambientes virtuais, de acordo com o que tiver desenvolvendo e desta forma, os pacotes instalados ficam específicos para cada ambiente.
 85 | 
 86 | #### Configurando o Ambiente
 87 | 
 88 | Os passos descritos a seguir, foram executados na minha máquina (Windows 10) e em uma VM Linux (Ubuntu 19.10). No entanto, para cada computador, poderá haver nuâncias que não serão contempladas nestas simples instruções.
 89 | 
 90 | Instale o Python3 (com o `pip`) seguindo os passos de acordo com o seu Sistema Operacional (SO):
 91 | 
 92 | - [Linux](https://python.org.br/instalacao-linux/)
 93 | - [MacOS](https://python.org.br/instalacao-mac/)
 94 | - [Windows](https://python.org.br/instalacao-windows/)
 95 | 
 96 | Com o Python3 e o `pip` devidamente instalados, abra o Terminal do seu SO e inicie o processo de criação do seu ambiente virtual. Primeiro instale o pacote para criar os ambientes virtuais:
 97 | ```
 98 | pip install virtualenv
 99 | ```
100 | 
101 | Em seguida, crie uma pasta para armazenar as suas máquinas virtuais (você pode fazer a criação da pasta pela interface gráfica do seu SO, se preferir):
102 | ```
103 | C:\Users\user\Desktop> mkdir <virtualenv>
104 | ```
105 | Substitua `<virtualenv>` pelo nome da pasta. Neste exemplo, a pasta será `virtualenv`. Lembre-se do caminho aonde você está com a linha de execução do seu Terminal (`C:\Users\user\Desktop`, por exemplo).
106 | 
107 | Crie um ambiente virtual para executar o Python:
108 | ```bash
109 | C:\Users\user\Desktop> virtualenv virtualenv\virtual_1
110 | ```
111 | 
112 | Ative o ambiente virtual:
113 | ```bash
114 | virtualenv\virtual_1\Scripts\activate
115 | ```
116 | 
117 | Agora, antes da linha de comando, aparecerá um flag `(virtual_1)` dizendo que você está usando o virtualenv 'virtual_1':
118 | ```bash
119 | (virtual_1) C:\Users\user\Desktop>
120 | ```
121 | A partir daqui, você pode começar a instalar as bibliotecas que desejar. Para este projeto, vamos fazer uso do gerenciador `pip` para instalar alguns pacotes.
122 | 
123 | ___
124 | 
125 | ## Dependências do projeto
126 | 
127 | Neste momento, acesse o local aonde o projeto foi baixado, por exemplo:
128 | 
129 | ```bash
130 | (virtual_1) C:\Users\user\Desktop> cd GenerateReportBib
131 | (virtual_1) C:\Users\user\Desktop\GenerateReportBib>
132 | ```
133 | 
134 | Para que o projeto funcione, antes você precisa instalar alguns pacotes. Para facilitar este processo, execute o comando abaixo que ele importará todos os pacotes de uma só vez:
135 | 
136 | ```bash
137 | (virtual_1) C:\Users\user\Desktop\GenerateReportBib> pip install -r requirements.txt
138 | ```
139 | 
140 | * Os pacotes listados no `requirements.txt` foram suficientes para que o projeto funcionasse no Windows 10 e no Ubuntu 19.10, no entanto, ao executar o GRB, caso seja necessário a instalação de mais algum pacote, aparecerá listado no Terminal, o seu respectivo nome. A partir dele, faça a instalação utilizando o `pip`.
141 | 
142 | Em seguida instale o pacote de `stopwords` da biblioteca `NLTK`. Este pacote serve para fazer a verificação e correção de capitalização nos campos referentes a nomes de connferências, journals, editoras e etc:
143 | 
144 | ```bash
145 | (virtual_1) C:\Users\user\Desktop\GenerateReportBib> python -m nltk.downloader stopwords
146 | ```
147 | 
148 | ___
149 | 
150 | ## Entendendo o GRB
151 | 
152 | Se todos os passos anteriores foram executados sem problemas, você já pode fazer uso deste projeto (**GRB.py**).
153 | 
154 | Antes de iniciar, preste bastante atenção na estrutura de pastas do projeto. Ela é importante para que o funcionamento do **grb.py** ocorra de maneira correta. Entenda a organização de pastas:
155 | 
156 | - **_OriginalBIB:_** É nesta pasta que você deve inserir o seu `arquivo.bib` original.
157 | - **_GenerateReports:_** É nesta pasta que os relatórios com a listagem dos erros encontrados do seu `arquivo.bib` serão apresentados. A cada nova execução do **grb.py**, um novo relatório será gerado com a data e horário da execução.
158 | - **_GenerateBIB:_** É nesta pasta que os novos arquivos .bib serão gerados. Após a execução do **grb.py**, caso o seu bib não esteja com problemas de compilação, será gerado um `novo_arquivo.bib` aqui, com a mesma data e horário do relatório correspondente a mesma execução.
159 | - **_screenshots_:** Pasta com as imagens de exemplos que constam neste arquivo `README.md`.
160 | 
161 | Dentro das pastas seguem arquivos utilizados para testar o funcionamento do **grb.py**. Depois de tudo configurado, você pode executar o **grb.py** sem realizar nenhuma alteração e deve ser gerado um `relatório.html` e um `novo_arquivo.bib` com as informações de teste.
162 | 
163 |   - O arquivo principal **`grb.py`** é o script que realiza todo o processamento e verificações do seu `arquivo.bib`.
164 |   - Para fazer uso do **grb.py** você não precisa mexer nele. A sua edição só é estimulada, caso pretenda contribuir com melhorias e refatorações no código. Caso contrário, qualquer mudança pode ocasionar mal funcionamento na geração dos `relatórios de erros` e do `novo_arquivo.bib`.
165 | 
166 | ___
167 | 
168 | ## Utilizando o GRB
169 | 
170 | Para fazer uso do **grb.py**, inicialmente copie o seu `arquivo.bib` para dentro da pasta `OriginalBIB`.
171 | 
172 | A partir desse momento, será necessário configurar alguns parâmetros que serão utilizados para executar o **grb.py**. Preencha os parâmetros corretamente, seguindo as restrições de preenchimento especificadas a seguir:
173 | 
174 | - **<arquivo.bib>:** Preencha com o nome do seu `arquivo.bib` original que precisa, obrigatoriamente, estar localizado na pasta `OriginalBIB`.",
175 | 
176 | - **\<idioma>:** Idioma da sua Dissertação ou Tese. Para PORTUGUÊS utilize: `pt` / Para INGLÊS utilize: `en`)",
177 | 
178 | - **\<tipo>:** Tipo das suas referências. Para estilos NUM ou ALPHA utilize: `num` ou `alpha` / Para o estilo APALIKE utilize: `apa`"
179 | 
180 | Com os parâmetros devidamente configurados, você já pode fazer a execução do **grb.py**. Para isso, abra o Terminal na linha de comando do seu projeto e execute o **grb.py** substituindo os valores `<parametro>` pelas suas informações:
181 | 
182 | ```bash
183 | (virtual_1) C:\Users\user\Desktop\GenerateReportBib>python grb.py <arquivo.bib> -L <idioma> - T <tipo>
184 | ```
185 | 
186 | Exemplo de execução:
187 | ```
188 | python grb.py referencesTest.bib -L en -T apa
189 | ```
190 | 
191 | As tags `-L` e `-T` precisam ser especificadas junto com a execução, pois elas identificam a linguagem (idioma: pt ou en) da sua dissertação ou tese e o tipo dela (num , alpha ou apa), respectivamente.
192 | 
193 | Por exemplo, se após a execução do **grb.py** aparecer no seu Terminal mensagens parecidas com:
194 | 
195 | ```
196 | LANGUAGE:  en
197 | TYPE_REFERENCES:  apa
198 | FILE_NAME:  referencesTest.bib
199 | Exporting to referencesTest_Report_2020-05-20_19-20-39.html
200 | ```
201 | 
202 | Isso significa que o **grb.py** executou sem problemas. No entanto, isso não indica que o processo encerra aqui.
203 | 
204 | Dependendo de como estiver o seu `arquivo.bib`, os primeiros relatórios servirão para que você arrume alguns detalhes do seu `arquivo.bib` e execute novamente o **grb.py**.
205 | 
206 | O **grb.py** gera mais de um tipo de relatório, portanto, para verificar quais foram os problemas encontrados, acesse a pasta `GenerateReports` e busque pelo arquivo `report.html`. O nome do relatório é gerado com a data e horário da execução do **grb.py**, por exemplo: `_Report_2020-05-19_15-11-05.html`.
207 | 
208 | Por padrão, este arquivo é gerado com extensão `.html` dentro da pasta `GenerateReports`. No entanto, se no momento da conversão para `.html`, o `arquivo.bib` original estiver em uma codificação diferente de `utf-8`, será apresentado um erro no console do Terminal (especificando o/os caracter(es) que não foram identificados pela codificação `utf-8`). Desta forma, o relatório ainda será gerado, só que não mais no formato `.html`, mas sim no formato `.md (markdown)`, na mesma pasta dos relatórios: `GenerateReports`.
209 |   - *Sugestão:* caso seja gerado o arquivo em formato `.md`, você pode utilizar a ferramenta [`Visual Studio Code (VSC)`](https://code.visualstudio.com/) para abri-lo. Depois que abrir o arquivo no VSC, procure por um botão com uma lupa, localizado no canto superior direito, que o arquivo será apresentado no formato visual 'compilado'.
210 | 
211 | - **DICA:** Depois que este **grb.py** gerar o seu primeiro `novo_arquivo.bib`, este novo arquivo já possuirá algumas correções, mas no relatório, ele listará todos os erros encontrados, mesmo que ele já tenha corrigido. A partir desse `novo_arquivo.bib` (com as correções realizadas pelo **grb.py**), passe ele no **grb.py** como se fosse o seu arquivo .bib original, pois desta forma, grande parte dos itens listados no primeiro relatório como erro, não serão mais listados no próximo relatório. A partir desse momento, você pode iniciar o processo de correção das suas referências.
212 | 
213 | ### Possíveis Situações de Relatórios de Erros
214 | 
215 | Neste tópico, serão apresentados algumas situações que podem gerar erro na execução do **grb.py**. No entanto, podem não ser as únicas.
216 | 
217 | #### Erro nos parâmetros do script
218 | 
219 | Enquanto os parâmetros da execução do **grb.py** não forem preenchidos corretamente, conforme instruções e restrições acima, o **grb.py** não funcionará. Será apresentado no console algumas mensagens informando quais parâmetros ele está esperando. Além disso, caso o `arquivo.bib` definido na linha de comando não for válido, o seguinte relatório de erro pode ser gerado:
220 | 
221 | ![](screenshots/reportErrorComLine.PNG)
222 | 
223 | #### Erro no arquivo BIB
224 | 
225 | Caso o seu `arquivo.bib` esteja com algum problema, o **grb.py** ficará gerando relatórios de erros (com mensagens específicas do determinado problema), até que o mesmo seja corrigido. Possíveis problemas no `arquivo.bib`, encontrados durante os testes, que geram problemas:
226 | 
227 | - Quando há labels de referências repetidos, como por exemplo `(olimar2020)`:
228 | 
229 | ```
230 | @article{olimar2020,
231 |   title={Título},
232 |   author={Borges, Olimar Teixeira},
233 |   journal={Journal},
234 |   volume={30},
235 |   pages={389-406},
236 |   month={Mar},
237 |   year={2020}
238 | }
239 | 
240 | @inproceedings{olimar2020,
241 |   title={Título},
242 |   author={Borges, Olimar Teixeira},
243 |   booktitle={Booktitle},
244 |   pages={389-406},
245 |   year={2020}
246 | }
247 | ```
248 | 
249 | - Os labels programados neste **grb.py** para serem verificados são: _`@book, @article, @inproceedings @proceedings, @mastersthesis, @conference, @phdthesis, @techreport, @misc, @booklet, @inbook, @incollection`_. Qualquer outro label de referência que estiver dentro do `arquivo.bib`, gerará um relatório de erro específico. Esta restrição se deve, pois são estes labels que estão definidos na padronização utilizada no documento de referências do PPGCC. Demais tags não são previstas, portanto, não são tratadas. Para que o **grb.py** funcione, sugiro retirar ou atualizar por lables válidos, dentro do `arquivo.bib` e executar novamente o **grb.py**.
250 | 
251 | Veja um exemplo de relatório que não gera o `novo_arquivo.bib`, devido a alguns dos erros mencionados anteriormente:
252 | 
253 | ![](screenshots/reportErrorOriginalBIB.PNG)
254 | 
255 | ### Principais Relatórios
256 | 
257 | Quando tudo ocorrer de acordo, ao executar o **grb.py**, você conseguirá emitir relatórios de erros que servirão como auxílio para a correção das suas referências. Entenda os relatórios para que eles possam realmente ajudar você nesse processo de correção.
258 | 
259 | #### Relatório com os Avisos
260 | 
261 | Este relatório é o principal deste projeto! Ele é gerado quando não há mais erros nos parâmetros e nem no `arquivo.bib`.
262 | 
263 | Neste relatório será apresentado as validações que o **grb.py** realizou. Para cada uma das referências do `arquivo.bib`, que for identificada alguma inconsistência, será listada a referência e o aviso correspondente. No coluna `Warning` consta as descrições destes avisos, que devem ser corrigidos. Veja o screenshot de exemplo de um relatório de avisos final:
264 | 
265 | ![](screenshots/reportWarning.PNG)
266 | 
267 | Entenda alguns dos possíveis `Warnings` que poderão ser gerados em seu relatório:
268 | 
269 | - _Type not implemented:_: Este erro é `muito importante`, pois ele IMPEDE que seja gerado o `novo_arquivo.bib`, enquanto o(is) label(s) que não são válidos, não forem retirados ou atualizados por labels válidos.
270 |   - **Dica:** Antes de iniciar as correções em seu `arquivo.bib`, corrija estes 'problemas' do tipo `Type not implemented`, deixando apenas os labels válidos e execute o **grb.py** novamente. Desta forma, o `novo_arquivo.bib` também será gerado e você poderá começar a usá-lo!
271 |   - O screenshot a seguir apresenta um relatório com erro de `_New .bib file was not generated! Invalid tags have been identified in your .bib._`, que foi gerado pelo _Type not implemented:_
272 |   ![](screenshots/bibNotGenerate.PNG)
273 | 
274 | - _Failed Month and Year: year={Mon, Year} check_: Em referências no estilo `apa`, para citações do tipo `@article`, o mês e ano precisam constar juntos dentro da mesma tag `year={Mon, Year}`. Informe o mês abreviado.
275 | - _Failed Month month={ Mon } check_: Em referências no estilo `num` ou `alpha`, para citações do tipo `@article`, a informação de `month={}` e `year={}` também são obrigatórias, mas no entanto, devem ser inseridas em suas respectivas tags individuais.
276 | - _The { tag } field should not contain the year information_: A informação do ano só deve constar dentro da tag `year={}` e não junto com o nome da Conferência ou Journal.
277 | - _Field { tag } is not capitalized_: Os nomes das conferências, jornals, editoras e etc, devem estar capitalizados, ou seja, sempre a primeira letra de cada palavra precisa estar em maiúsculo.
278 | - _Missing: { tags }_: As tags informadas dentro dos `{}` são de preenchimento obrigatório para a determinada citação.
279 | 	- Para cada tipo de citação (`num/alpha` ou `apa`) existem campos obrigatórios, entenda quais são:
280 | 		```python
281 | 		if TYPE_REFERENCES == 'num-alpha':
282 | 		    REQ = {
283 | 			'book': {'author', 'title', 'publisher', 'year', 'numpages'},
284 | 			'article': {'title', 'author', 'journal', 'volume', 'year', 'month', 'pages'},
285 | 			'inproceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
286 | 			'conference': {'title', 'author', 'booktitle', 'pages', 'year'},
287 | 			'proceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
288 | 			'mastersthesis': {'title', 'author', 'numpages', 'school', 'year'},
289 | 			'phdthesis': {'title', 'author', 'numpages', 'school', 'year'},
290 | 			'techreport': {'title', 'author', 'numpages', 'institution', 'year'},
291 | 			'misc': {'title', 'author', 'url', 'urlaccessdate'},
292 | 			'booklet': {'title', 'author', 'howpublished', 'address', 'year', 'numpages'},
293 | 			'inbook': {'title', 'author', 'year', 'pages', 'publisher', 'chapter'},
294 | 			'incollection': {'title', 'author', 'year', 'booktitle', 'publisher'}
295 | 		    }
296 | 		elif TYPE_REFERENCES == 'apa':
297 | 		    REQ = {
298 | 			'book': {'title', 'author', 'publisher', 'year'},
299 | 			'article': {'title', 'author', 'year', 'journal', 'pages', 'volume'},
300 | 			'inproceedings': {'title', 'author', 'booktitle', 'pages', 'organization', 'year'},
301 | 			'conference': {'title', 'author', 'booktitle', 'pages','organization', 'year'},
302 | 			'proceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
303 | 			'mastersthesis': {'title', 'author', 'year', 'school', 'address'},
304 | 			'phdthesis': {'title', 'author', 'year', 'school', 'address'},
305 | 			'techreport': {'title', 'author', 'institution', 'year', 'type'},
306 | 			'misc': {'title', 'author', 'year', 'note', 'howpublished'},
307 | 			'booklet': {'title', 'author', 'howpublished', 'year'},
308 | 			'inbook': {'title', 'author', 'year', 'pages', 'publisher', 'chapter'},
309 | 			'incollection': {'title', 'author', 'year', 'booktitle', 'publisher', 'volume', 'pages', 'edition'}
310 | 		    }
311 | 		```
312 | 
313 | #### Novo arquivo BIB
314 | 
315 | Junto com este relatório dos avisos, é gerado um `novo_arquivo.bib` (quando não ocorrerem os problemas já descritos).
316 | 
317 | Dento do `novo_arquivo.bib`, em relação aos três últimos erros listados anteriormente (_The { tag } field takes no year information_, _Field { tag } is not capitalized_ e _Missing: { tags }_),quando forem identificados, eles serão 'corrigidos' e apresentados dentro do arquivo. Em relação ao primeiro erro, o ano será retirado da informação da tag. No segundo, o nome que antes não estava capitalizado, será capitalizado. E no terceiro, as tags que estavam faltando, serão adicionadas junto ao valor `'MISSING'`. Desta forma, rapidamente será possível identificar quais tags precisam ser preenchidas com as informações obrigatórias.
318 | 
319 | Por exemplo, caso no `arquivo.bib` original conste a seguinte referência, com a configuração `en` e `num` ou `alpha`:
320 | 
321 | ```
322 | @book{LabelDaCitacao,
323 |   title={Computers as components: principles of embedded computing system design},
324 |   author={W. Wolf},
325 |   year={2001},
326 |   publisher={Morgan kaufmann publishers},
327 |   address={New York, EUA}
328 | }
329 | ```
330 | 
331 | No arquivo de relatório será apresentada para esta referênicia a mensagem: `Missing: {'numpages'}`. Já que os campos obrigatórios para livros no estilo `num` ou `alpha` são: `{'author', 'title', 'publisher', 'year', 'numpages'}`. Além disso, a tag `publisher` deve estar capitalizada. Desta forma, será gerado no `novo_arquivo.bib` a seguinte referência:
332 | 
333 | ```
334 | @book{LabelDaCitacao,
335 |   title={Computers as components: principles of embedded computing system design},
336 |   author={W. Wolf},
337 |   year={2001},
338 |   address={New York, EUA},
339 |   publisher={Morgan Kaufmann Publishers},
340 |   numpages={MISSING}
341 | }
342 | ```
343 | 
344 | Para cada referência com campos faltantes, será gerada uma tag correspondente neste novo arquivo, com: `tag={MISSING}`.
345 | 
346 | #### Relatório PARA-BÉNS
347 | 
348 | Este relatório será gerado quando não houverem inconsistências definidas neste projeto. Ele não é, necessariamente, uma garantia de que suas referências estão totalmente de acordo. É de responsabilidade do(a) pesquisador(a) verificar suas próprias citações. A seguir, veja o screenshot deste relatório:
349 | 
350 | ![](screenshots/reportCongrats.PNG)
351 | 
352 | ___
353 | 
354 | ## Agradecimentos
355 | Agradecimento ao colega Alumni de Mestrado do programa e amigo [Pedro Ballester](https://github.com/Ballester) pela disponibilização do código embrião deste projeto!
356 | 
357 | ___
358 | 
359 | ## Contribuindo
360 | Se você acha este projeto útil e gostaria de contribuir com ele, fique à vontade em fazer alterações e refatorações no código e em seguida abra **[Pull requests](https://help.github.com/pt/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request)** para se tornarem parte oficial deste projeto.
361 | 


--------------------------------------------------------------------------------
/find_errors_bib.py:
--------------------------------------------------------------------------------
 1 | from pybtex.database import parse_file
 2 | 
 3 | bib_data = parse_file('references.bib')
 4 | LANGUAGE = 'eng'
 5 | 
 6 | assert LANGUAGE in ['eng', 'pt']
 7 | if LANGUAGE == 'eng':
 8 |     MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 9 | elif LANGUAGE == 'pt':
10 |     MONTHS = ['Jan', 'Fev', 'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez']
11 | 
12 | 
13 | REQ = {
14 |     'article': {'title', 'author', 'journal', 'volume', 'year', 'pages'},
15 |     'inproceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
16 |     'techreport': {'title', 'author', 'numpages', 'institution', 'year', 'type'},
17 |     'incollection': {'title', 'author', 'year', 'booktitle', 'publisher'},
18 |     'inbook': {'title', 'author', 'year', 'pages', 'publisher', 'chapter'},
19 |     'booklet': {'title', 'author', 'howpublished', 'address', 'year', 'numpages'},
20 |     'misc': {'title', 'author', 'url', 'urlaccessdate'},
21 |     'mastersthesis': {'title', 'author', 'numpages', 'school', 'year', 'type'},
22 |     'phdthesis': {'title', 'author', 'numpages', 'school', 'year', 'type'},
23 |     'book': {'author', 'title', 'publisher', 'year', 'numpages'}
24 | }
25 | 
26 | 
27 | def check(bib):
28 |     fields = set(bib.fields.keys() + bib.persons.keys())
29 |     if bib.type not in REQ.keys():
30 |         raise Exception('Type %s not implemented' % bib.type)
31 |         
32 |     req = REQ[bib.type].difference(fields)
33 |     if bib.type == 'article' and 'year' in fields:
34 |         year_check = check_article_year(bib.fields['year'])
35 |         if not year_check:
36 |             print(bib.key + ': Failed {Month year} check') 
37 |         
38 |     return req
39 | 
40 | def check_article_year(year):
41 |     year = year.split()
42 |     try:
43 |         month, year = year[0], year[1]
44 |         MONTHS.index(month)
45 |         int(year)
46 |     except:
47 |         return False
48 | 
49 |     return True
50 |         
51 | 
52 | for entry in bib_data.entries:
53 |     bib = bib_data.entries[entry]
54 | 
55 |     req = check(bib)
56 |     
57 |     if len(req) > 0:
58 |         print(entry, ': ', bib.fields['title'], ' --- ', req)
59 |         #print(bib)
60 |         #print(fields)
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/grb.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import shlex
  4 | import json
  5 | import shutil
  6 | import nltk
  7 | import argparse
  8 | 
  9 | from langdetect import detect
 10 | from pathlib import Path
 11 | from datetime import datetime
 12 | from datetime import date
 13 | from pybtex.database import parse_file, BibliographyDataError, Entry
 14 | from pybtex.database.input.bibtex import UndefinedMacro
 15 | from pybtex.scanner import TokenRequired
 16 | from pybtex.richtext import Text, Tag
 17 | from nltk.corpus import stopwords
 18 | 
 19 | now = datetime.now()
 20 | label_date = str(date.today()) + now.strftime("_%H-%M-%S")
 21 | 
 22 | #Stopwords for capitalization
 23 | #nltk.download('stopwords')
 24 | LIST_STOPWORDS_ENGLISH = stopwords.words('english')
 25 | LIST_STOPWORDS_ENGLISH.append('st')
 26 | LIST_STOPWORDS_ENGLISH.append('nd')
 27 | LIST_STOPWORDS_ENGLISH.append('rd')
 28 | LIST_STOPWORDS_PORTUGUESE = stopwords.words('portuguese')
 29 | LIST_STOPWORDS_SPANISH = stopwords.words('spanish')
 30 | LIST_STOPWORDS_GERMAN = stopwords.words('german')
 31 | LIST_STOPWORDS_GERMAN.append('and')
 32 | 
 33 | # Exception List
 34 | EXCEPTION_LIST = ['ArXiv', 'arXiv', 'arxiv', '-', '\&', '&']
 35 | 
 36 | #Create directories
 37 | path_current = os.getcwd() # Current directory
 38 | path_bib_original = os.path.join(path_current, "OriginalBIB")
 39 | path_reports = os.path.join(path_current, "GenerateReports")
 40 | path_bib = os.path.join(path_current, "GenerateBIB")
 41 | 
 42 | ## Load configuration
 43 | def main():
 44 | 
 45 |     LANGUAGES = {
 46 |     "en": "english",
 47 |     "pt": "portuguese",
 48 |     }
 49 | 
 50 |     TYPES = {
 51 |         "num": "num-alpha",
 52 |         "alpha": "num-alpha",
 53 |         "apa": "apa",
 54 |     }
 55 | 
 56 |     #arguments entered via the command line
 57 |     argparser = argparse.ArgumentParser(
 58 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
 59 |     )
 60 |     argparser.add_argument(dest="filename")
 61 |     argparser.add_argument(
 62 |         "-L",
 63 |         "--language",
 64 |         choices=LANGUAGES.keys(),
 65 |         help="Select a language { 'en' or 'pt' }",
 66 |         default="english",
 67 |     )
 68 |     argparser.add_argument(
 69 |         "-T",
 70 |         "--type",
 71 |         choices=TYPES.keys(),
 72 |         help="Select a type { 'num', 'alpha' or 'apa' }",
 73 |         default="num",
 74 |     )
 75 | 
 76 |     #for test
 77 |     '''
 78 |     LANGUAGE = 'en'
 79 |     print('LANGUAGE: ', LANGUAGE)
 80 |     LANGUAGE = LANGUAGES[LANGUAGE]
 81 | 
 82 |     TYPE_REFERENCES = 'num'
 83 |     print('TYPE_REFERENCES: ', TYPE_REFERENCES)
 84 |     TYPE_REFERENCES = TYPES[TYPE_REFERENCES]
 85 | 
 86 |     FILE_NAME = 'referencesTest.bib'
 87 |     print('FILE_NAME: ', FILE_NAME)
 88 |     '''
 89 |     #production
 90 |     #'''
 91 |     args = argparser.parse_args()
 92 |     LANGUAGE = args.language
 93 |     print('LANGUAGE: ', LANGUAGE)
 94 |     LANGUAGE = LANGUAGES[LANGUAGE]
 95 | 
 96 |     TYPE_REFERENCES = args.type
 97 |     print('TYPE_REFERENCES: ', TYPE_REFERENCES)
 98 |     TYPE_REFERENCES = TYPES[TYPE_REFERENCES]
 99 | 
100 |     FILE_NAME = args.filename
101 |     print('FILE_NAME: ', FILE_NAME)
102 |     #'''
103 |     NAME_FILE_OUTPUT_REPORT_MD = FILE_NAME[:-4] +"_Report_" + label_date + ".md"
104 |     NAME_FILE_OUTPUT_REPORT_HTML = FILE_NAME[:-4] +"_Report_" + label_date + ".html"
105 |     NAME_FILE_OUTPUT_BIB = FILE_NAME[:-4] + "_" + label_date + ".bib"
106 | 
107 |     file = open("results_temp.txt","w+",encoding="utf-8")
108 | 
109 |     lineHeader1 = "# References Report: " + str(label_date) + '\r'
110 |     lineHeader2 = "## - Configurations: _Bib File:_ **"+ FILE_NAME +"** / _Language:_ **"+ LANGUAGE +"** / _Type References:_ **"+ TYPE_REFERENCES +"**\r\n"
111 |     file.write(lineHeader1)
112 |     file.write(lineHeader2)
113 | 
114 |     # ===========================================================
115 |     # If repeated bibliograhpy entry or others erros
116 |     msg_erros = ''
117 |     stop = True
118 |     read_error_bib = False
119 |     error_command_line = False
120 |     file_found = False
121 |     #pass_language = False
122 | 
123 |     count_repeated_entry = 0
124 |     count_duplicate_field = 0
125 |     rep_tag_ant = ''
126 |     dup_field_ant = ''
127 |     while stop == True:
128 |         try:
129 |             '''
130 |             assert LANGUAGE in ['english', 'portuguese']
131 |             pass_language = True
132 |             assert TYPE_REFERENCES in ['apa', 'num-alpha']
133 |             '''
134 | 
135 |             if os.path.exists("refer_find_errors_generate_temp.bib") == False:
136 |                 with os.scandir(path_bib_original) as entries:
137 |                     for entry in entries:
138 |                         if entry.name == FILE_NAME:
139 |                             shutil.copyfile(os.path.join(path_bib_original, FILE_NAME), 'refer_find_errors_generate_temp.bib')
140 |                             file_found = True
141 |                             break
142 | 
143 |             if file_found == True:
144 |                 bib_data = parse_file('refer_find_errors_generate_temp.bib')
145 |             else:
146 |                 error_command_line = True
147 |                 msg_erros += '- ## The name of the .bib file set as a parameter on the command line was not found in the bib file folder. '
148 |                 msg_erros += 'Check if it is actually in the folder (OriginalBIB) or if the file name was spelled correctly in the parameter.\r'
149 | 
150 |             stop = False
151 |         except FileNotFoundError as identifier:
152 |             error_command_line = True
153 |             msg_erros += '- ## The name of the .bib file set as a parameter on the command line was not found in the bib file folder. '
154 |             msg_erros += 'Check if it is actually in the folder (OriginalBIB) or if the file name was spelled correctly in the parameter.\r'
155 |             stop = False
156 |             '''
157 |             except AssertionError as identifier:
158 |                 error_command_line = True
159 | 
160 |                 if pass_language == True:
161 |                     msg_erros += '- ## The value defined in the parameter of your reference type is not valid. '
162 |                     msg_erros += 'Valid values: [`num`, `alpha` or `apa`].\r'
163 |                 else:
164 |                     msg_erros += '- ## The value defined in your reference language parameter is not valid. '
165 |                     msg_erros += 'Valid values: [`en` or `pt`].\r'
166 | 
167 |                 stop = False
168 |             '''
169 |         except BibliographyDataError as identifier:
170 |             read_error_bib = True
171 | 
172 |             msg_erros += '- ## ' + str(identifier) + '\r'
173 | 
174 |             if 'repeated bibliograhpy entry' in str(identifier):
175 |                 tag_rep = str(identifier).split(':', 1)
176 | 
177 |                 if rep_tag_ant == tag_rep[1].strip():
178 |                     count_repeated_entry+=1
179 |                 else:
180 |                     count_repeated_entry=1
181 | 
182 |                 rep_tag_ant = tag_rep[1].strip()
183 |                 #print(tag_rep_ant,'\r')
184 | 
185 |                 #input file
186 |                 fin = open("refer_find_errors_generate_temp.bib", "rt",encoding="utf-8")
187 |                 old = str(tag_rep[1]).strip()+str(",")
188 |                 new = str(tag_rep[1]).strip()+str(count_repeated_entry)+str(",")
189 |                 contents = fin.read().replace(old, new, count_repeated_entry)
190 |                 fin.close()
191 | 
192 |                 fin = open("refer_find_errors_generate_temp.bib", "w+",encoding="utf-8")
193 |                 fin.write(contents)
194 |                 fin.close()
195 | 
196 |             elif 'has a duplicate' in str(identifier):
197 |                 arr_msg = str(identifier).split(' ')
198 |                 tag = arr_msg[3]
199 |                 dup_field = arr_msg[7]
200 | 
201 |                 if dup_field_ant == dup_field.strip():
202 |                     count_duplicate_field+=1
203 |                 else:
204 |                     count_duplicate_field=1
205 | 
206 |                 dup_field_ant = dup_field.strip()
207 | 
208 |                 old = dup_field
209 |                 new = dup_field+str(count_duplicate_field)
210 | 
211 |                 #input file
212 |                 fin = open("refer_find_errors_generate_temp.bib", "rt",encoding="utf-8")
213 |                 contents = fin.read()
214 |                 contents_partition = contents.partition(tag)
215 |                 res_replace = contents_partition[2].replace(old, new, count_duplicate_field)
216 |                 contents = contents_partition[0]+contents_partition[1]+res_replace
217 |                 contents
218 |                 fin.close()
219 | 
220 |                 fin = open("refer_find_errors_generate_temp.bib", "w+",encoding="utf-8")
221 |                 fin.write(contents)
222 |                 fin.close()
223 | 
224 |         except UndefinedMacro as identifier:
225 |             read_error_bib = True
226 |             erro = identifier.args[0]
227 |             msg_erros += '- ## ' + str(identifier) + '\r'
228 | 
229 |             #input file
230 |             fin = open("refer_find_errors_generate_temp.bib", "rt",encoding="utf-8")
231 |             contents = fin.read().replace(erro, 'JAN')
232 |             fin.close()
233 | 
234 |             fin = open("refer_find_errors_generate_temp.bib", "w+",encoding="utf-8")
235 |             fin.write(contents)
236 |             fin.close()
237 | 
238 |         except TokenRequired as identifier:
239 |             read_error_bib = True
240 |             erro = identifier.args[0]
241 |             value_new = ''
242 | 
243 |             msg_erros += '## - ' + str(identifier) + '\r'
244 | 
245 |             #input file
246 |             fin = open("refer_find_errors_generate_temp.bib", "rt",encoding="utf-8")
247 |             contents = fin.read().replace(erro, value_new)
248 |             fin.close()
249 | 
250 |             fin = open("refer_find_errors_generate_temp.bib", "w+",encoding="utf-8")
251 |             fin.write(contents)
252 |             fin.close()
253 |             stop = False
254 | 
255 |     if os.path.exists("refer_find_errors_generate_temp.bib"):
256 |         os.remove("refer_find_errors_generate_temp.bib")
257 | 
258 |     # ===========================================================
259 | 
260 |     if read_error_bib == True:
261 |         line = '# Error reading your .bib file!\r\n'
262 |         line += '### Errors may be related:\r'
263 |         line += '- the label of repeated references. Repeated entries will be listed below. '
264 |         line += 'Check the labels (tags) used before continuing to run the report.\r'
265 |         line += '- information for month={WRONG}. This field must always be filled in English (even if its volume is in Portuguese), '
266 |         line += 'nor can it be empty. '
267 |         line += '(_Accepted formats: [Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec]_)\r\n'
268 |         line += '## Errors found:\r'
269 | 
270 |         line_footer = '#### *Warning:* Correct repeated entries or month information in your original .bib file. '
271 |         line_footer += '#### Afterwards, run this script again to generate the error report. \r'
272 |         file.write(line)
273 |         file.write(msg_erros+'\r\n')
274 |         file.write(line_footer)
275 | 
276 |     elif error_command_line == True:
277 |         line = '# Error trying to run your command line.!\r\n'
278 |         line += '## Errors found:\r'
279 | 
280 |         line_footer = '#### Afterwards, run this script again to generate the error report. \r'
281 |         file.write(line)
282 |         file.write(msg_erros+'\r\n')
283 |         file.write(line_footer)
284 | 
285 |     else:
286 |         MONTHS_ENG_VALID = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
287 |         MONTHS_PORT_VALID = ['Jan', 'Fev', 'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez']
288 | 
289 |         assert LANGUAGE in ['english', 'portuguese']
290 |         if LANGUAGE == 'english' and TYPE_REFERENCES == 'apa':
291 |             MONTHS = ['Jan,', 'Feb,', 'Mar,', 'Apr,', 'May,', 'Jun,', 'Jul,', 'Aug,', 'Sep,', 'Oct,', 'Nov,', 'Dec,']
292 | 
293 |         elif LANGUAGE == 'english' and TYPE_REFERENCES == 'num-alpha':
294 |             MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
295 | 
296 |         elif LANGUAGE == 'portuguese' and TYPE_REFERENCES == 'apa':
297 |             MONTHS = ['Jan,', 'Fev,', 'Mar,', 'Abr,', 'Mai,', 'Jun,', 'Jul,', 'Ago,', 'Set,', 'Out,', 'Nov,', 'Dez,']
298 | 
299 |         elif LANGUAGE == 'portuguese' and TYPE_REFERENCES == 'num-alpha':
300 |             MONTHS = ['Jan', 'Fev', 'Mar', 'Abr', 'Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez']
301 | 
302 |         assert TYPE_REFERENCES in ['apa', 'num', 'alpha', 'num-alpha']
303 |         if TYPE_REFERENCES == 'num-alpha':
304 |             REQ = {
305 |                 'book': {'author', 'title', 'publisher', 'year', 'numpages'},
306 |                 'article': {'title', 'author', 'journal', 'volume', 'year', 'month', 'pages'},
307 |                 'inproceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
308 |                 'conference': {'title', 'author', 'booktitle', 'pages', 'year'},
309 |                 'proceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
310 |                 'mastersthesis': {'title', 'author', 'numpages', 'school', 'year'},
311 |                 'phdthesis': {'title', 'author', 'numpages', 'school', 'year'},
312 |                 'techreport': {'title', 'author', 'numpages', 'institution', 'year'},
313 |                 'misc': {'title', 'author', 'url', 'urlaccessdate'},
314 |                 'booklet': {'title', 'author', 'howpublished', 'address', 'year', 'numpages'},
315 |                 'inbook': {'title', 'author', 'year', 'pages', 'publisher', 'chapter'},
316 |                 'incollection': {'title', 'author', 'year', 'booktitle', 'publisher'}
317 |             }
318 |         elif TYPE_REFERENCES == 'apa':
319 |             REQ = {
320 |                 'book': {'title', 'author', 'publisher', 'year'},
321 |                 'article': {'title', 'author', 'year', 'journal', 'pages', 'volume'},
322 |                 'inproceedings': {'title', 'author', 'booktitle', 'pages', 'organization', 'year'},
323 |                 'conference': {'title', 'author', 'booktitle', 'pages', 'organization', 'year'},
324 |                 'proceedings': {'title', 'author', 'booktitle', 'pages', 'year'},
325 |                 'mastersthesis': {'title', 'author', 'year', 'school', 'address'},
326 |                 'phdthesis': {'title', 'author', 'year', 'school', 'address'},
327 |                 'techreport': {'title', 'author', 'institution', 'year', 'type'},
328 |                 'misc': {'title', 'author', 'year', 'note', 'howpublished'},
329 |                 'booklet': {'title', 'author', 'howpublished', 'year'},
330 |                 'inbook': {'title', 'author', 'year', 'pages', 'publisher', 'chapter'},
331 |                 'incollection': {'title', 'author', 'year', 'booktitle', 'publisher', 'volume', 'pages', 'edition'}
332 |             }
333 | 
334 |         bib_data = parse_file(os.path.join(path_bib_original, FILE_NAME))
335 |         ##
336 | 
337 |         line1 = '| Error | Type |Tag | Title | Warning |' + '\r'
338 |         line2 = '|-:|:---:|:---:|:---:|:------:|' + '\r'
339 |         file.write(line1)
340 |         file.write(line2)
341 | 
342 |         congratulations = False
343 |         count=0
344 |         tag_inv_global = False
345 |         for entry in bib_data.entries:
346 |             bib = bib_data.entries[entry]
347 |             #print('entry: ============ ', entry)
348 |             msg, tag_inv = check(bib, REQ, MONTHS, TYPE_REFERENCES)
349 | 
350 |             if tag_inv == False:
351 |                 msg_gen, bib = regenerate_bib(bib, REQ)
352 |                 msg+=msg_gen
353 | 
354 |                 bib_data.entries[entry] = bib
355 |             else:
356 |                 tag_inv_global = True
357 | 
358 |             if len(msg) > 0:
359 |                 count+=1
360 |                 line = '| ' + str(count) +' |@'+ bib.type + '| {' + str(entry) + '} |' + str(bib.fields['title']) + ' | ' + msg + ' | ' + '\r'
361 |                 file.write(line)
362 | 
363 | 
364 |         if count != 0:
365 |             lineFooter1 = "# _Total references:_ **" + str(len(bib_data.entries)) + "** / _References with errors:_ **" + str(count) +"**\r\n"
366 |             lineFooter2 = '## _Check the errors shown below_ \r'
367 |             lineFooter3 = '- After correcting them, run the script again to ensure no new inconsistencies (or not).\r\n'
368 |             file.write(lineFooter1)
369 |             file.write(lineFooter2)
370 |             file.write(lineFooter3)
371 | 
372 |             if tag_inv_global == True:
373 |                 line5 = '## **New .bib file was not generated!** _Invalid tags have been identified in your .bib_.\r'
374 |                 line6 = '- In order to generate a .bib with the missing fields, first you need to correct these tags in your '
375 |                 line7 = 'original .bib and only after running the script again.\r'
376 |                 line8 = '- In the column _Warning_ you will enter which of the references is with: _**Type not implemented**_\r'
377 |                 line9 = '- Valid tags: _@book, @article, @inproceedings, @proceedings, @mastersthesis, @phdthesis, '
378 |                 line10 = '@techreport, @misc, @booklet, @inbook, @incollection_.'
379 |                 file.write(line5)
380 |                 file.write(line6)
381 |                 file.write(line7)
382 |                 file.write(line8)
383 |                 file.write(line9)
384 |                 file.write(line10)
385 | 
386 |         else:
387 |             congratulations = True
388 |             lineCongrat1 = '# Congratulations! \r\n'
389 |             lineCongrat2 = '## - No errors were identified in the fields. However, it is still necessary to check the standardization of your references, which this script does not guarantee.'
390 |             file.write(lineCongrat1)
391 |             file.write(lineCongrat2)
392 | 
393 |         if tag_inv_global == False and congratulations == False:
394 | 
395 |             file_bib = open(os.path.join(path_bib, NAME_FILE_OUTPUT_BIB),"w+", encoding="utf-8")
396 | 
397 |             file_bib.write(bib_data.to_string('bibtex'))
398 |             file_bib.close()
399 | 
400 |             if LANGUAGE == 'portuguese':
401 |                 fin = open(os.path.join(path_bib, NAME_FILE_OUTPUT_BIB), "rt+",encoding="utf-8")
402 |                 contents = ''
403 |                 for index, month in enumerate(MONTHS_ENG_VALID):
404 |                     month_pt = MONTHS_PORT_VALID[index].upper()
405 |                     contents += fin.read().replace(month, month_pt)
406 | 
407 |                 fin.close()
408 | 
409 |                 fin = open(os.path.join(path_bib, NAME_FILE_OUTPUT_BIB), "w+",encoding="utf-8")
410 |                 fin.write(contents)
411 |                 fin.close()
412 | 
413 |     file.close()
414 | 
415 |     shutil.copyfile("results_temp.txt", NAME_FILE_OUTPUT_REPORT_MD)
416 | 
417 |     command = "grip "+NAME_FILE_OUTPUT_REPORT_MD+" --export "+NAME_FILE_OUTPUT_REPORT_HTML
418 |     os.system(command)
419 | 
420 |     del_html = False
421 |     if os.path.exists(NAME_FILE_OUTPUT_REPORT_HTML):
422 |         file_html = open(NAME_FILE_OUTPUT_REPORT_HTML, "rt",encoding="utf-8")
423 |         contents = file_html.read()
424 | 
425 |         if '500 Internal Server Error' in contents:
426 |             del_html = True
427 |         file_html.flush()
428 |         file_html.close()
429 | 
430 |     else:
431 |         del_html = True
432 | 
433 |     if del_html == True:
434 |         shutil.copyfile(NAME_FILE_OUTPUT_REPORT_MD, os.path.join(path_reports, NAME_FILE_OUTPUT_REPORT_MD))
435 |     else:
436 |         shutil.copyfile(NAME_FILE_OUTPUT_REPORT_HTML, os.path.join(path_reports, NAME_FILE_OUTPUT_REPORT_HTML))
437 | 
438 |     if os.path.exists("results_temp.txt"):
439 |         os.remove("results_temp.txt")
440 |     if os.path.exists(NAME_FILE_OUTPUT_REPORT_MD):
441 |         os.remove(NAME_FILE_OUTPUT_REPORT_MD)
442 |     if os.path.exists(NAME_FILE_OUTPUT_REPORT_HTML):
443 |         os.remove(NAME_FILE_OUTPUT_REPORT_HTML)
444 | 
445 | 
446 | def regenerate_bib(bib, REQ):
447 |     list_fields_keys = [field_key.lower() for field_key in bib.fields.keys() if bib.fields[field_key] != '']
448 |     list_fields_persons = [field_person.lower() for field_person in bib.persons.keys() if bib.persons[field_person] != '']
449 |     fields = set(list_fields_keys + list_fields_persons)
450 |     missing = REQ[bib.type].difference(fields)
451 | 
452 |     msg = ''
453 | 
454 |     if bib.type == 'book' or bib.type == 'inbook':
455 |         exist = 'publisher' in missing
456 |         if exist == False:
457 |             phrase = bib.fields['publisher']
458 |             msg_year, phrase = find_year(phrase, 'publisher')
459 |             msg += msg_year
460 | 
461 |             publisher_capitalize, phrase_cap = check_parentheses_and_capitalize(phrase)
462 | 
463 |             #capitalize
464 |             if len(phrase_cap) > 0:
465 |                 bib.fields['publisher'] = phrase_cap
466 |             elif '&' in phrase:
467 |                 phrase = treatAmpersand(phrase)
468 |                 bib.fields['publisher'] = phrase
469 | 
470 |             if publisher_capitalize == True:
471 |                 msg += 'Field { publisher } is not capitalized; '
472 | 
473 |     if bib.type == 'article':
474 |         exist = 'journal' in missing
475 |         if exist == False:
476 |             phrase = bib.fields['journal']
477 |             msg_year, phrase = find_year(phrase, 'journal')
478 |             msg += msg_year
479 | 
480 |             journal_capitalize, phrase_cap = check_parentheses_and_capitalize(phrase)
481 | 
482 |             #capitalize
483 |             if len(phrase_cap) > 0:
484 |                 bib.fields['journal'] = phrase_cap
485 |             elif '&' in phrase:
486 |                 phrase = treatAmpersand(phrase)
487 |                 bib.fields['journal'] = phrase
488 | 
489 |             if journal_capitalize == True:
490 |                 msg += 'Field { journal } is not capitalized; '
491 | 
492 |     if bib.type == 'inproceedings' or bib.type == 'proceedings' or bib.type == 'incollection' or bib.type == 'conference':
493 |         exist = 'booktitle' in missing
494 |         if exist == False:
495 |             phrase = bib.fields['booktitle']
496 |             msg_year, phrase = find_year(phrase, 'booktitle')
497 |             msg += msg_year
498 | 
499 |             booktitle_capitalize, phrase_cap = check_parentheses_and_capitalize(phrase)
500 | 
501 |             #capitalize
502 |             if len(phrase_cap) > 0:
503 |                 bib.fields['booktitle'] = phrase_cap
504 |             elif '&' in phrase:
505 |                 phrase = treatAmpersand(phrase)
506 |                 bib.fields['booktitle'] = phrase
507 | 
508 |             if booktitle_capitalize == True:
509 |                 msg += 'Field { booktitle } is not capitalized; '
510 | 
511 |     if bib.type == 'mastherthesis' or bib.type == 'phdthesis':
512 |         exist = 'school' in missing
513 |         if exist == False:
514 |             phrase = bib.fields['school']
515 |             msg_year, phrase = find_year(phrase, 'school')
516 |             msg += msg_year
517 | 
518 |             school_capitalize, phrase_cap = check_parentheses_and_capitalize(phrase)
519 | 
520 |             #capitalize
521 |             if len(phrase_cap) > 0:
522 |                 bib.fields['school'] = phrase_cap
523 |             elif '&' in phrase:
524 |                 phrase = treatAmpersand(phrase)
525 |                 bib.fields['school'] = phrase
526 | 
527 |             if school_capitalize == True:
528 |                 msg += 'Field { school } is not capitalized; '
529 | 
530 |     if bib.type == 'techreport':
531 |         exist = 'institution' in missing
532 |         if exist == False:
533 |             phrase = bib.fields['institution']
534 |             msg_year, phrase = find_year(phrase, 'institution')
535 |             msg += msg_year
536 | 
537 |             institution_capitalize, phrase_cap = check_parentheses_and_capitalize(phrase)
538 | 
539 |             #capitalize
540 |             if len(phrase_cap) > 0:
541 |                 bib.fields['institution'] = phrase_cap
542 |             elif '&' in phrase:
543 |                 phrase = treatAmpersand(phrase)
544 |                 bib.fields['institution'] = phrase
545 | 
546 |             if institution_capitalize == True:
547 |                 msg += 'Field { institution } is not capitalized; '
548 | 
549 |     if bib.type in REQ.keys():
550 |         for value in missing:
551 |             bib.fields[value] = 'MISSING'
552 | 
553 |     return msg, bib
554 | 
555 | ##
556 | def check(bib, req, months, type_references):
557 |     list_fields_keys = [field_key.lower() for field_key in bib.fields.keys()]
558 |     list_fields_persons = [field_person.lower() for field_person in bib.persons.keys()]
559 |     fields = set(list_fields_keys + list_fields_persons)
560 | 
561 |     msg = ''
562 |     tag_inv = False
563 |     if bib.type not in req.keys():
564 |         msg = 'Type not implemented: [@' + bib.type + '] remove or replace; '
565 |         tag_inv = True
566 |     else:
567 |         if type_references == 'apa':
568 |             if bib.type == 'article' and 'year' in fields:
569 |                 year_month_check = check_article_year_month(bib.fields['year'], months, type_references)
570 |                 if not year_month_check:
571 |                     msg += 'Failed Month and Year: year={Mon, Year} check; '
572 |         if type_references == 'num-alpha':
573 |             if bib.type == 'article' and 'month' in fields:
574 |                 month_check = check_article_year_month(bib.fields['month'], months, type_references)
575 |                 if not month_check:
576 |                     msg += 'Failed Month month={ Mon } check; '
577 | 
578 |         missing = req[bib.type].difference(fields)
579 |         if len(missing) > 0:
580 |             msg += 'Missing: ' + str(missing) + "; "
581 | 
582 |     return msg, tag_inv
583 | 
584 | def find_year(phrase, tag):
585 |     msg = ''
586 |     result = re.findall(re.compile('.*([1-3][0-9]{3})'), phrase)
587 |     if len(result) > 0:
588 |         msg = 'The {'+tag+'} field should not contain the year information:  '+ str(result) +' remove; '
589 |         phrase = phrase.replace(str(result[0]), '')
590 | 
591 |     return msg, phrase
592 | 
593 | def check_parentheses_and_capitalize(phrase):
594 |     language = detect(phrase)
595 | 
596 |     #remove parenthesis
597 |     phrase = re.sub(r"\((.*?)\)", ' ', phrase)
598 | 
599 |     words = phrase.split()
600 |     words_aux = []
601 | 
602 |     uncapitalized = False
603 |     hif = False
604 |     word2 = ''
605 |     STOP_LIST = []
606 |     if language == 'en':
607 |         STOP_LIST = LIST_STOPWORDS_ENGLISH
608 |     elif language == 'pt':
609 |         STOP_LIST = LIST_STOPWORDS_PORTUGUESE
610 |     elif language == 'es':
611 |         STOP_LIST = LIST_STOPWORDS_SPANISH
612 |     elif language == 'de':
613 |         STOP_LIST = LIST_STOPWORDS_GERMAN
614 |     else:
615 |         STOP_LIST = LIST_STOPWORDS_ENGLISH
616 | 
617 |     for word in words:
618 |         if word not in STOP_LIST:
619 |             if "-" in word and word != '-':
620 |                 hif = True
621 |                 words_hifen = word.split('-')
622 |                 word = words_hifen[0]
623 |                 word2 = words_hifen[1]
624 |             if word == 'e': # e-Business, e-Science, ...
625 |                  words_aux.append(word+"-"+word2)
626 |             elif word in EXCEPTION_LIST:
627 |                 if "&" in word:
628 |                     words_aux.append("&")
629 |                 else:
630 |                     words_aux.append(word)
631 |             elif len(word) > 0:
632 |                 if word.isdigit() == True:
633 |                     words_aux.append(word)
634 |             else:
635 |                 if word.isupper() == False:
636 |                     if word.isupper() == False:
637 |                     #if word != word.capitalize():
638 |                         uncapitalized = True
639 |                         if hif == True:
640 |                             words_aux.append(word.capitalize()+"-"+word2)
641 |                         else:
642 |                             words_aux.append(word.capitalize())
643 |                     else:
644 |                         if hif == True:
645 |                             words_aux.append(word+"-"+word2)
646 |                         else:
647 |                             words_aux.append(word)
648 |                 else:
649 |                     words_aux.append("{"+word+"}")
650 |         else:
651 |             words_aux.append(word)
652 | 
653 | 
654 |     if uncapitalized == True:
655 |         phrase_cap = ' '.join([word for word in words_aux])
656 |     else:
657 |         phrase_cap = ''
658 | 
659 |     return uncapitalized, phrase_cap
660 | 
661 | def treatAmpersand(phrase):
662 |     phrase = re.sub(r"\((.*?)\)", ' ', phrase)
663 | 
664 |     words = phrase.split()
665 |     words_aux = []
666 |     for word in words:
667 |         if "&" in word:
668 |             words_aux.append("&")
669 |         else:
670 |             words_aux.append(word)
671 | 
672 |     new_phrase = ' '.join([word for word in words_aux])
673 | 
674 |     return new_phrase
675 | 
676 | def check_article_year_month(fields, months, type_references):
677 |     fields = fields.split()
678 | 
679 |     if type_references == "apa":
680 |         try:
681 |             month, year = fields[0][-4:], fields[1]
682 |             months.index(month)
683 |             int(year)
684 |         except:
685 |             return False
686 |     #elif type_references == "num-alpha":
687 |     #    month, year = fields[0][-3:], fields[1]
688 | 
689 |     return True
690 | 
691 | if __name__ == "__main__":
692 |     main()
693 | 


--------------------------------------------------------------------------------
/nltk_config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | import nltk
4 | nltk.download('stopwords')


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pathlib
2 | grip
3 | markdown2
4 | click
5 | wkhtmltopdf
6 | nltk
7 | langdetect
8 | pybtex
9 | 


--------------------------------------------------------------------------------
/screenshots/bibNotGenerate.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppgcc/GenerateReportBib/493c9683b008dd3c3588bf21dcb62946e7edf79b/screenshots/bibNotGenerate.PNG


--------------------------------------------------------------------------------
/screenshots/reportCongrats.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppgcc/GenerateReportBib/493c9683b008dd3c3588bf21dcb62946e7edf79b/screenshots/reportCongrats.PNG


--------------------------------------------------------------------------------
/screenshots/reportErrorComLine.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppgcc/GenerateReportBib/493c9683b008dd3c3588bf21dcb62946e7edf79b/screenshots/reportErrorComLine.PNG


--------------------------------------------------------------------------------
/screenshots/reportErrorOriginalBIB.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppgcc/GenerateReportBib/493c9683b008dd3c3588bf21dcb62946e7edf79b/screenshots/reportErrorOriginalBIB.PNG


--------------------------------------------------------------------------------
/screenshots/reportWarning.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ppgcc/GenerateReportBib/493c9683b008dd3c3588bf21dcb62946e7edf79b/screenshots/reportWarning.PNG


--------------------------------------------------------------------------------