├── .gitignore ├── .travis.yml ├── COPYING.LESSER.md ├── COPYING.md ├── Makefile ├── PatternOmatic ├── __init__.py ├── api.py ├── ge │ ├── __init__.py │ ├── individual.py │ ├── population.py │ └── stats.py ├── nlp │ ├── __init__.py │ └── bnf.py └── settings │ ├── __init__.py │ ├── config.py │ ├── literals.py │ └── log.py ├── README.md ├── config.ini ├── patternomatic_logo.svg ├── requirements.txt ├── scripts ├── __init__.py └── patternomatic.py ├── setup.py └── tests ├── __init__.py ├── test_api.py ├── test_bnf.py ├── test_individual.py ├── test_population.py ├── test_script.py ├── test_settings.py └── test_stats.py /.gitignore: -------------------------------------------------------------------------------- 1 | # PatternOmatic non sources to ignore 2 | .idea/ 3 | /venv 4 | *pycache* 5 | .scannerwork/ 6 | .coverage 7 | coverage.xml 8 | build/ 9 | dist/ 10 | PatternOmatic.egg-info 11 | fil-result 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # ······················································································································ 2 | # PatternOmatic's CI & CD pipeline. Mainly Makefile based 3 | # 4 | # This file is part of PatternOmatic. 5 | # 6 | # Copyright © 2020 Miguel Revuelta Espinosa 7 | # 8 | # PatternOmatic is free software: you can redistribute it and/or 9 | # modify it under the terms of the GNU Lesser General Public License 10 | # as published by the Free Software Foundation, either version 3 of 11 | # the License, or (at your option) any later version. 12 | # 13 | # PatternOmatic is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Lesser General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Lesser General Public License 19 | # along with PatternOmatic. If not, see . 20 | # 21 | # ······················································································································ 22 | language: python 23 | 24 | addons: 25 | sonarcloud: 26 | organization: "revuel" 27 | 28 | python: 29 | - "3.7" 30 | 31 | if: tag IS blank 32 | 33 | script: 34 | - make libs 35 | - make coverage 36 | - make sonarcloud 37 | - if [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then make build; fi 38 | - if [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then make publish; fi 39 | -------------------------------------------------------------------------------- /COPYING.LESSER.md: -------------------------------------------------------------------------------- 1 | ### GNU LESSER GENERAL PUBLIC LICENSE 2 | 3 | Version 3, 29 June 2007 4 | 5 | Copyright (C) 2007 Free Software Foundation, Inc. 6 | 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | This version of the GNU Lesser General Public License incorporates the 12 | terms and conditions of version 3 of the GNU General Public License, 13 | supplemented by the additional permissions listed below. 14 | 15 | #### 0. Additional Definitions. 16 | 17 | As used herein, "this License" refers to version 3 of the GNU Lesser 18 | General Public License, and the "GNU GPL" refers to version 3 of the 19 | GNU General Public License. 20 | 21 | "The Library" refers to a covered work governed by this License, other 22 | than an Application or a Combined Work as defined below. 23 | 24 | An "Application" is any work that makes use of an interface provided 25 | by the Library, but which is not otherwise based on the Library. 26 | Defining a subclass of a class defined by the Library is deemed a mode 27 | of using an interface provided by the Library. 28 | 29 | A "Combined Work" is a work produced by combining or linking an 30 | Application with the Library. The particular version of the Library 31 | with which the Combined Work was made is also called the "Linked 32 | Version". 33 | 34 | The "Minimal Corresponding Source" for a Combined Work means the 35 | Corresponding Source for the Combined Work, excluding any source code 36 | for portions of the Combined Work that, considered in isolation, are 37 | based on the Application, and not on the Linked Version. 38 | 39 | The "Corresponding Application Code" for a Combined Work means the 40 | object code and/or source code for the Application, including any data 41 | and utility programs needed for reproducing the Combined Work from the 42 | Application, but excluding the System Libraries of the Combined Work. 43 | 44 | #### 1. Exception to Section 3 of the GNU GPL. 45 | 46 | You may convey a covered work under sections 3 and 4 of this License 47 | without being bound by section 3 of the GNU GPL. 48 | 49 | #### 2. Conveying Modified Versions. 50 | 51 | If you modify a copy of the Library, and, in your modifications, a 52 | facility refers to a function or data to be supplied by an Application 53 | that uses the facility (other than as an argument passed when the 54 | facility is invoked), then you may convey a copy of the modified 55 | version: 56 | 57 | - a) under this License, provided that you make a good faith effort 58 | to ensure that, in the event an Application does not supply the 59 | function or data, the facility still operates, and performs 60 | whatever part of its purpose remains meaningful, or 61 | - b) under the GNU GPL, with none of the additional permissions of 62 | this License applicable to that copy. 63 | 64 | #### 3. Object Code Incorporating Material from Library Header Files. 65 | 66 | The object code form of an Application may incorporate material from a 67 | header file that is part of the Library. You may convey such object 68 | code under terms of your choice, provided that, if the incorporated 69 | material is not limited to numerical parameters, data structure 70 | layouts and accessors, or small macros, inline functions and templates 71 | (ten or fewer lines in length), you do both of the following: 72 | 73 | - a) Give prominent notice with each copy of the object code that 74 | the Library is used in it and that the Library and its use are 75 | covered by this License. 76 | - b) Accompany the object code with a copy of the GNU GPL and this 77 | license document. 78 | 79 | #### 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, taken 82 | together, effectively do not restrict modification of the portions of 83 | the Library contained in the Combined Work and reverse engineering for 84 | debugging such modifications, if you also do each of the following: 85 | 86 | - a) Give prominent notice with each copy of the Combined Work that 87 | the Library is used in it and that the Library and its use are 88 | covered by this License. 89 | - b) Accompany the Combined Work with a copy of the GNU GPL and this 90 | license document. 91 | - c) For a Combined Work that displays copyright notices during 92 | execution, include the copyright notice for the Library among 93 | these notices, as well as a reference directing the user to the 94 | copies of the GNU GPL and this license document. 95 | - d) Do one of the following: 96 | - 0) Convey the Minimal Corresponding Source under the terms of 97 | this License, and the Corresponding Application Code in a form 98 | suitable for, and under terms that permit, the user to 99 | recombine or relink the Application with a modified version of 100 | the Linked Version to produce a modified Combined Work, in the 101 | manner specified by section 6 of the GNU GPL for conveying 102 | Corresponding Source. 103 | - 1) Use a suitable shared library mechanism for linking with 104 | the Library. A suitable mechanism is one that (a) uses at run 105 | time a copy of the Library already present on the user's 106 | computer system, and (b) will operate properly with a modified 107 | version of the Library that is interface-compatible with the 108 | Linked Version. 109 | - e) Provide Installation Information, but only if you would 110 | otherwise be required to provide such information under section 6 111 | of the GNU GPL, and only to the extent that such information is 112 | necessary to install and execute a modified version of the 113 | Combined Work produced by recombining or relinking the Application 114 | with a modified version of the Linked Version. (If you use option 115 | 4d0, the Installation Information must accompany the Minimal 116 | Corresponding Source and Corresponding Application Code. If you 117 | use option 4d1, you must provide the Installation Information in 118 | the manner specified by section 6 of the GNU GPL for conveying 119 | Corresponding Source.) 120 | 121 | #### 5. Combined Libraries. 122 | 123 | You may place library facilities that are a work based on the Library 124 | side by side in a single library together with other library 125 | facilities that are not Applications and are not covered by this 126 | License, and convey such a combined library under terms of your 127 | choice, if you do both of the following: 128 | 129 | - a) Accompany the combined library with a copy of the same work 130 | based on the Library, uncombined with any other library 131 | facilities, conveyed under the terms of this License. 132 | - b) Give prominent notice with the combined library that part of it 133 | is a work based on the Library, and explaining where to find the 134 | accompanying uncombined form of the same work. 135 | 136 | #### 6. Revised Versions of the GNU Lesser General Public License. 137 | 138 | The Free Software Foundation may publish revised and/or new versions 139 | of the GNU Lesser General Public License from time to time. Such new 140 | versions will be similar in spirit to the present version, but may 141 | differ in detail to address new problems or concerns. 142 | 143 | Each version is given a distinguishing version number. If the Library 144 | as you received it specifies that a certain numbered version of the 145 | GNU Lesser General Public License "or any later version" applies to 146 | it, you have the option of following the terms and conditions either 147 | of that published version or of any later version published by the 148 | Free Software Foundation. If the Library as you received it does not 149 | specify a version number of the GNU Lesser General Public License, you 150 | may choose any version of the GNU Lesser General Public License ever 151 | published by the Free Software Foundation. 152 | 153 | If the Library as you received it specifies that a proxy can decide 154 | whether future versions of the GNU Lesser General Public License shall 155 | apply, that proxy's public statement of acceptance of any version is 156 | permanent authorization for you to choose that version for the 157 | Library. 158 | -------------------------------------------------------------------------------- /COPYING.md: -------------------------------------------------------------------------------- 1 | ### GNU GENERAL PUBLIC LICENSE 2 | 3 | Version 3, 29 June 2007 4 | 5 | Copyright (C) 2007 Free Software Foundation, Inc. 6 | 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | ### Preamble 12 | 13 | The GNU General Public License is a free, copyleft license for 14 | software and other kinds of works. 15 | 16 | The licenses for most software and other practical works are designed 17 | to take away your freedom to share and change the works. By contrast, 18 | the GNU General Public License is intended to guarantee your freedom 19 | to share and change all versions of a program--to make sure it remains 20 | free software for all its users. We, the Free Software Foundation, use 21 | the GNU General Public License for most of our software; it applies 22 | also to any other work released this way by its authors. You can apply 23 | it to your programs, too. 24 | 25 | When we speak of free software, we are referring to freedom, not 26 | price. Our General Public Licenses are designed to make sure that you 27 | have the freedom to distribute copies of free software (and charge for 28 | them if you wish), that you receive source code or can get it if you 29 | want it, that you can change the software or use pieces of it in new 30 | free programs, and that you know you can do these things. 31 | 32 | To protect your rights, we need to prevent others from denying you 33 | these rights or asking you to surrender the rights. Therefore, you 34 | have certain responsibilities if you distribute copies of the 35 | software, or if you modify it: responsibilities to respect the freedom 36 | of others. 37 | 38 | For example, if you distribute copies of such a program, whether 39 | gratis or for a fee, you must pass on to the recipients the same 40 | freedoms that you received. You must make sure that they, too, receive 41 | or can get the source code. And you must show them these terms so they 42 | know their rights. 43 | 44 | Developers that use the GNU GPL protect your rights with two steps: 45 | (1) assert copyright on the software, and (2) offer you this License 46 | giving you legal permission to copy, distribute and/or modify it. 47 | 48 | For the developers' and authors' protection, the GPL clearly explains 49 | that there is no warranty for this free software. For both users' and 50 | authors' sake, the GPL requires that modified versions be marked as 51 | changed, so that their problems will not be attributed erroneously to 52 | authors of previous versions. 53 | 54 | Some devices are designed to deny users access to install or run 55 | modified versions of the software inside them, although the 56 | manufacturer can do so. This is fundamentally incompatible with the 57 | aim of protecting users' freedom to change the software. The 58 | systematic pattern of such abuse occurs in the area of products for 59 | individuals to use, which is precisely where it is most unacceptable. 60 | Therefore, we have designed this version of the GPL to prohibit the 61 | practice for those products. If such problems arise substantially in 62 | other domains, we stand ready to extend this provision to those 63 | domains in future versions of the GPL, as needed to protect the 64 | freedom of users. 65 | 66 | Finally, every program is threatened constantly by software patents. 67 | States should not allow patents to restrict development and use of 68 | software on general-purpose computers, but in those that do, we wish 69 | to avoid the special danger that patents applied to a free program 70 | could make it effectively proprietary. To prevent this, the GPL 71 | assures that patents cannot be used to render the program non-free. 72 | 73 | The precise terms and conditions for copying, distribution and 74 | modification follow. 75 | 76 | ### TERMS AND CONDITIONS 77 | 78 | #### 0. Definitions. 79 | 80 | "This License" refers to version 3 of the GNU General Public License. 81 | 82 | "Copyright" also means copyright-like laws that apply to other kinds 83 | of works, such as semiconductor masks. 84 | 85 | "The Program" refers to any copyrightable work licensed under this 86 | License. Each licensee is addressed as "you". "Licensees" and 87 | "recipients" may be individuals or organizations. 88 | 89 | To "modify" a work means to copy from or adapt all or part of the work 90 | in a fashion requiring copyright permission, other than the making of 91 | an exact copy. The resulting work is called a "modified version" of 92 | the earlier work or a work "based on" the earlier work. 93 | 94 | A "covered work" means either the unmodified Program or a work based 95 | on the Program. 96 | 97 | To "propagate" a work means to do anything with it that, without 98 | permission, would make you directly or secondarily liable for 99 | infringement under applicable copyright law, except executing it on a 100 | computer or modifying a private copy. Propagation includes copying, 101 | distribution (with or without modification), making available to the 102 | public, and in some countries other activities as well. 103 | 104 | To "convey" a work means any kind of propagation that enables other 105 | parties to make or receive copies. Mere interaction with a user 106 | through a computer network, with no transfer of a copy, is not 107 | conveying. 108 | 109 | An interactive user interface displays "Appropriate Legal Notices" to 110 | the extent that it includes a convenient and prominently visible 111 | feature that (1) displays an appropriate copyright notice, and (2) 112 | tells the user that there is no warranty for the work (except to the 113 | extent that warranties are provided), that licensees may convey the 114 | work under this License, and how to view a copy of this License. If 115 | the interface presents a list of user commands or options, such as a 116 | menu, a prominent item in the list meets this criterion. 117 | 118 | #### 1. Source Code. 119 | 120 | The "source code" for a work means the preferred form of the work for 121 | making modifications to it. "Object code" means any non-source form of 122 | a work. 123 | 124 | A "Standard Interface" means an interface that either is an official 125 | standard defined by a recognized standards body, or, in the case of 126 | interfaces specified for a particular programming language, one that 127 | is widely used among developers working in that language. 128 | 129 | The "System Libraries" of an executable work include anything, other 130 | than the work as a whole, that (a) is included in the normal form of 131 | packaging a Major Component, but which is not part of that Major 132 | Component, and (b) serves only to enable use of the work with that 133 | Major Component, or to implement a Standard Interface for which an 134 | implementation is available to the public in source code form. A 135 | "Major Component", in this context, means a major essential component 136 | (kernel, window system, and so on) of the specific operating system 137 | (if any) on which the executable work runs, or a compiler used to 138 | produce the work, or an object code interpreter used to run it. 139 | 140 | The "Corresponding Source" for a work in object code form means all 141 | the source code needed to generate, install, and (for an executable 142 | work) run the object code and to modify the work, including scripts to 143 | control those activities. However, it does not include the work's 144 | System Libraries, or general-purpose tools or generally available free 145 | programs which are used unmodified in performing those activities but 146 | which are not part of the work. For example, Corresponding Source 147 | includes interface definition files associated with source files for 148 | the work, and the source code for shared libraries and dynamically 149 | linked subprograms that the work is specifically designed to require, 150 | such as by intimate data communication or control flow between those 151 | subprograms and other parts of the work. 152 | 153 | The Corresponding Source need not include anything that users can 154 | regenerate automatically from other parts of the Corresponding Source. 155 | 156 | The Corresponding Source for a work in source code form is that same 157 | work. 158 | 159 | #### 2. Basic Permissions. 160 | 161 | All rights granted under this License are granted for the term of 162 | copyright on the Program, and are irrevocable provided the stated 163 | conditions are met. This License explicitly affirms your unlimited 164 | permission to run the unmodified Program. The output from running a 165 | covered work is covered by this License only if the output, given its 166 | content, constitutes a covered work. This License acknowledges your 167 | rights of fair use or other equivalent, as provided by copyright law. 168 | 169 | You may make, run and propagate covered works that you do not convey, 170 | without conditions so long as your license otherwise remains in force. 171 | You may convey covered works to others for the sole purpose of having 172 | them make modifications exclusively for you, or provide you with 173 | facilities for running those works, provided that you comply with the 174 | terms of this License in conveying all material for which you do not 175 | control copyright. Those thus making or running the covered works for 176 | you must do so exclusively on your behalf, under your direction and 177 | control, on terms that prohibit them from making any copies of your 178 | copyrighted material outside their relationship with you. 179 | 180 | Conveying under any other circumstances is permitted solely under the 181 | conditions stated below. Sublicensing is not allowed; section 10 makes 182 | it unnecessary. 183 | 184 | #### 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 185 | 186 | No covered work shall be deemed part of an effective technological 187 | measure under any applicable law fulfilling obligations under article 188 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 189 | similar laws prohibiting or restricting circumvention of such 190 | measures. 191 | 192 | When you convey a covered work, you waive any legal power to forbid 193 | circumvention of technological measures to the extent such 194 | circumvention is effected by exercising rights under this License with 195 | respect to the covered work, and you disclaim any intention to limit 196 | operation or modification of the work as a means of enforcing, against 197 | the work's users, your or third parties' legal rights to forbid 198 | circumvention of technological measures. 199 | 200 | #### 4. Conveying Verbatim Copies. 201 | 202 | You may convey verbatim copies of the Program's source code as you 203 | receive it, in any medium, provided that you conspicuously and 204 | appropriately publish on each copy an appropriate copyright notice; 205 | keep intact all notices stating that this License and any 206 | non-permissive terms added in accord with section 7 apply to the code; 207 | keep intact all notices of the absence of any warranty; and give all 208 | recipients a copy of this License along with the Program. 209 | 210 | You may charge any price or no price for each copy that you convey, 211 | and you may offer support or warranty protection for a fee. 212 | 213 | #### 5. Conveying Modified Source Versions. 214 | 215 | You may convey a work based on the Program, or the modifications to 216 | produce it from the Program, in the form of source code under the 217 | terms of section 4, provided that you also meet all of these 218 | conditions: 219 | 220 | - a) The work must carry prominent notices stating that you modified 221 | it, and giving a relevant date. 222 | - b) The work must carry prominent notices stating that it is 223 | released under this License and any conditions added under 224 | section 7. This requirement modifies the requirement in section 4 225 | to "keep intact all notices". 226 | - c) You must license the entire work, as a whole, under this 227 | License to anyone who comes into possession of a copy. This 228 | License will therefore apply, along with any applicable section 7 229 | additional terms, to the whole of the work, and all its parts, 230 | regardless of how they are packaged. This License gives no 231 | permission to license the work in any other way, but it does not 232 | invalidate such permission if you have separately received it. 233 | - d) If the work has interactive user interfaces, each must display 234 | Appropriate Legal Notices; however, if the Program has interactive 235 | interfaces that do not display Appropriate Legal Notices, your 236 | work need not make them do so. 237 | 238 | A compilation of a covered work with other separate and independent 239 | works, which are not by their nature extensions of the covered work, 240 | and which are not combined with it such as to form a larger program, 241 | in or on a volume of a storage or distribution medium, is called an 242 | "aggregate" if the compilation and its resulting copyright are not 243 | used to limit the access or legal rights of the compilation's users 244 | beyond what the individual works permit. Inclusion of a covered work 245 | in an aggregate does not cause this License to apply to the other 246 | parts of the aggregate. 247 | 248 | #### 6. Conveying Non-Source Forms. 249 | 250 | You may convey a covered work in object code form under the terms of 251 | sections 4 and 5, provided that you also convey the machine-readable 252 | Corresponding Source under the terms of this License, in one of these 253 | ways: 254 | 255 | - a) Convey the object code in, or embodied in, a physical product 256 | (including a physical distribution medium), accompanied by the 257 | Corresponding Source fixed on a durable physical medium 258 | customarily used for software interchange. 259 | - b) Convey the object code in, or embodied in, a physical product 260 | (including a physical distribution medium), accompanied by a 261 | written offer, valid for at least three years and valid for as 262 | long as you offer spare parts or customer support for that product 263 | model, to give anyone who possesses the object code either (1) a 264 | copy of the Corresponding Source for all the software in the 265 | product that is covered by this License, on a durable physical 266 | medium customarily used for software interchange, for a price no 267 | more than your reasonable cost of physically performing this 268 | conveying of source, or (2) access to copy the Corresponding 269 | Source from a network server at no charge. 270 | - c) Convey individual copies of the object code with a copy of the 271 | written offer to provide the Corresponding Source. This 272 | alternative is allowed only occasionally and noncommercially, and 273 | only if you received the object code with such an offer, in accord 274 | with subsection 6b. 275 | - d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | - e) Convey the object code using peer-to-peer transmission, 288 | provided you inform other peers where the object code and 289 | Corresponding Source of the work are being offered to the general 290 | public at no charge under subsection 6d. 291 | 292 | A separable portion of the object code, whose source code is excluded 293 | from the Corresponding Source as a System Library, need not be 294 | included in conveying the object code work. 295 | 296 | A "User Product" is either (1) a "consumer product", which means any 297 | tangible personal property which is normally used for personal, 298 | family, or household purposes, or (2) anything designed or sold for 299 | incorporation into a dwelling. In determining whether a product is a 300 | consumer product, doubtful cases shall be resolved in favor of 301 | coverage. For a particular product received by a particular user, 302 | "normally used" refers to a typical or common use of that class of 303 | product, regardless of the status of the particular user or of the way 304 | in which the particular user actually uses, or expects or is expected 305 | to use, the product. A product is a consumer product regardless of 306 | whether the product has substantial commercial, industrial or 307 | non-consumer uses, unless such uses represent the only significant 308 | mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to 312 | install and execute modified versions of a covered work in that User 313 | Product from a modified version of its Corresponding Source. The 314 | information must suffice to ensure that the continued functioning of 315 | the modified object code is in no case prevented or interfered with 316 | solely because modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or 331 | updates for a work that has been modified or installed by the 332 | recipient, or for the User Product in which it has been modified or 333 | installed. Access to a network may be denied when the modification 334 | itself materially and adversely affects the operation of the network 335 | or violates the rules and protocols for communication across the 336 | network. 337 | 338 | Corresponding Source conveyed, and Installation Information provided, 339 | in accord with this section must be in a format that is publicly 340 | documented (and with an implementation available to the public in 341 | source code form), and must require no special password or key for 342 | unpacking, reading or copying. 343 | 344 | #### 7. Additional Terms. 345 | 346 | "Additional permissions" are terms that supplement the terms of this 347 | License by making exceptions from one or more of its conditions. 348 | Additional permissions that are applicable to the entire Program shall 349 | be treated as though they were included in this License, to the extent 350 | that they are valid under applicable law. If additional permissions 351 | apply only to part of the Program, that part may be used separately 352 | under those permissions, but the entire Program remains governed by 353 | this License without regard to the additional permissions. 354 | 355 | When you convey a copy of a covered work, you may at your option 356 | remove any additional permissions from that copy, or from any part of 357 | it. (Additional permissions may be written to require their own 358 | removal in certain cases when you modify the work.) You may place 359 | additional permissions on material, added by you to a covered work, 360 | for which you have or can give appropriate copyright permission. 361 | 362 | Notwithstanding any other provision of this License, for material you 363 | add to a covered work, you may (if authorized by the copyright holders 364 | of that material) supplement the terms of this License with terms: 365 | 366 | - a) Disclaiming warranty or limiting liability differently from the 367 | terms of sections 15 and 16 of this License; or 368 | - b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | - c) Prohibiting misrepresentation of the origin of that material, 372 | or requiring that modified versions of such material be marked in 373 | reasonable ways as different from the original version; or 374 | - d) Limiting the use for publicity purposes of names of licensors 375 | or authors of the material; or 376 | - e) Declining to grant rights under trademark law for use of some 377 | trade names, trademarks, or service marks; or 378 | - f) Requiring indemnification of licensors and authors of that 379 | material by anyone who conveys the material (or modified versions 380 | of it) with contractual assumptions of liability to the recipient, 381 | for any liability that these contractual assumptions directly 382 | impose on those licensors and authors. 383 | 384 | All other non-permissive additional terms are considered "further 385 | restrictions" within the meaning of section 10. If the Program as you 386 | received it, or any part of it, contains a notice stating that it is 387 | governed by this License along with a term that is a further 388 | restriction, you may remove that term. If a license document contains 389 | a further restriction but permits relicensing or conveying under this 390 | License, you may add to a covered work material governed by the terms 391 | of that license document, provided that the further restriction does 392 | not survive such relicensing or conveying. 393 | 394 | If you add terms to a covered work in accord with this section, you 395 | must place, in the relevant source files, a statement of the 396 | additional terms that apply to those files, or a notice indicating 397 | where to find the applicable terms. 398 | 399 | Additional terms, permissive or non-permissive, may be stated in the 400 | form of a separately written license, or stated as exceptions; the 401 | above requirements apply either way. 402 | 403 | #### 8. Termination. 404 | 405 | You may not propagate or modify a covered work except as expressly 406 | provided under this License. Any attempt otherwise to propagate or 407 | modify it is void, and will automatically terminate your rights under 408 | this License (including any patent licenses granted under the third 409 | paragraph of section 11). 410 | 411 | However, if you cease all violation of this License, then your license 412 | from a particular copyright holder is reinstated (a) provisionally, 413 | unless and until the copyright holder explicitly and finally 414 | terminates your license, and (b) permanently, if the copyright holder 415 | fails to notify you of the violation by some reasonable means prior to 416 | 60 days after the cessation. 417 | 418 | Moreover, your license from a particular copyright holder is 419 | reinstated permanently if the copyright holder notifies you of the 420 | violation by some reasonable means, this is the first time you have 421 | received notice of violation of this License (for any work) from that 422 | copyright holder, and you cure the violation prior to 30 days after 423 | your receipt of the notice. 424 | 425 | Termination of your rights under this section does not terminate the 426 | licenses of parties who have received copies or rights from you under 427 | this License. If your rights have been terminated and not permanently 428 | reinstated, you do not qualify to receive new licenses for the same 429 | material under section 10. 430 | 431 | #### 9. Acceptance Not Required for Having Copies. 432 | 433 | You are not required to accept this License in order to receive or run 434 | a copy of the Program. Ancillary propagation of a covered work 435 | occurring solely as a consequence of using peer-to-peer transmission 436 | to receive a copy likewise does not require acceptance. However, 437 | nothing other than this License grants you permission to propagate or 438 | modify any covered work. These actions infringe copyright if you do 439 | not accept this License. Therefore, by modifying or propagating a 440 | covered work, you indicate your acceptance of this License to do so. 441 | 442 | #### 10. Automatic Licensing of Downstream Recipients. 443 | 444 | Each time you convey a covered work, the recipient automatically 445 | receives a license from the original licensors, to run, modify and 446 | propagate that work, subject to this License. You are not responsible 447 | for enforcing compliance by third parties with this License. 448 | 449 | An "entity transaction" is a transaction transferring control of an 450 | organization, or substantially all assets of one, or subdividing an 451 | organization, or merging organizations. If propagation of a covered 452 | work results from an entity transaction, each party to that 453 | transaction who receives a copy of the work also receives whatever 454 | licenses to the work the party's predecessor in interest had or could 455 | give under the previous paragraph, plus a right to possession of the 456 | Corresponding Source of the work from the predecessor in interest, if 457 | the predecessor has it or can get it with reasonable efforts. 458 | 459 | You may not impose any further restrictions on the exercise of the 460 | rights granted or affirmed under this License. For example, you may 461 | not impose a license fee, royalty, or other charge for exercise of 462 | rights granted under this License, and you may not initiate litigation 463 | (including a cross-claim or counterclaim in a lawsuit) alleging that 464 | any patent claim is infringed by making, using, selling, offering for 465 | sale, or importing the Program or any portion of it. 466 | 467 | #### 11. Patents. 468 | 469 | A "contributor" is a copyright holder who authorizes use under this 470 | License of the Program or a work on which the Program is based. The 471 | work thus licensed is called the contributor's "contributor version". 472 | 473 | A contributor's "essential patent claims" are all patent claims owned 474 | or controlled by the contributor, whether already acquired or 475 | hereafter acquired, that would be infringed by some manner, permitted 476 | by this License, of making, using, or selling its contributor version, 477 | but do not include claims that would be infringed only as a 478 | consequence of further modification of the contributor version. For 479 | purposes of this definition, "control" includes the right to grant 480 | patent sublicenses in a manner consistent with the requirements of 481 | this License. 482 | 483 | Each contributor grants you a non-exclusive, worldwide, royalty-free 484 | patent license under the contributor's essential patent claims, to 485 | make, use, sell, offer for sale, import and otherwise run, modify and 486 | propagate the contents of its contributor version. 487 | 488 | In the following three paragraphs, a "patent license" is any express 489 | agreement or commitment, however denominated, not to enforce a patent 490 | (such as an express permission to practice a patent or covenant not to 491 | sue for patent infringement). To "grant" such a patent license to a 492 | party means to make such an agreement or commitment not to enforce a 493 | patent against the party. 494 | 495 | If you convey a covered work, knowingly relying on a patent license, 496 | and the Corresponding Source of the work is not available for anyone 497 | to copy, free of charge and under the terms of this License, through a 498 | publicly available network server or other readily accessible means, 499 | then you must either (1) cause the Corresponding Source to be so 500 | available, or (2) arrange to deprive yourself of the benefit of the 501 | patent license for this particular work, or (3) arrange, in a manner 502 | consistent with the requirements of this License, to extend the patent 503 | license to downstream recipients. "Knowingly relying" means you have 504 | actual knowledge that, but for the patent license, your conveying the 505 | covered work in a country, or your recipient's use of the covered work 506 | in a country, would infringe one or more identifiable patents in that 507 | country that you have reason to believe are valid. 508 | 509 | If, pursuant to or in connection with a single transaction or 510 | arrangement, you convey, or propagate by procuring conveyance of, a 511 | covered work, and grant a patent license to some of the parties 512 | receiving the covered work authorizing them to use, propagate, modify 513 | or convey a specific copy of the covered work, then the patent license 514 | you grant is automatically extended to all recipients of the covered 515 | work and works based on it. 516 | 517 | A patent license is "discriminatory" if it does not include within the 518 | scope of its coverage, prohibits the exercise of, or is conditioned on 519 | the non-exercise of one or more of the rights that are specifically 520 | granted under this License. You may not convey a covered work if you 521 | are a party to an arrangement with a third party that is in the 522 | business of distributing software, under which you make payment to the 523 | third party based on the extent of your activity of conveying the 524 | work, and under which the third party grants, to any of the parties 525 | who would receive the covered work from you, a discriminatory patent 526 | license (a) in connection with copies of the covered work conveyed by 527 | you (or copies made from those copies), or (b) primarily for and in 528 | connection with specific products or compilations that contain the 529 | covered work, unless you entered into that arrangement, or that patent 530 | license was granted, prior to 28 March 2007. 531 | 532 | Nothing in this License shall be construed as excluding or limiting 533 | any implied license or other defenses to infringement that may 534 | otherwise be available to you under applicable patent law. 535 | 536 | #### 12. No Surrender of Others' Freedom. 537 | 538 | If conditions are imposed on you (whether by court order, agreement or 539 | otherwise) that contradict the conditions of this License, they do not 540 | excuse you from the conditions of this License. If you cannot convey a 541 | covered work so as to satisfy simultaneously your obligations under 542 | this License and any other pertinent obligations, then as a 543 | consequence you may not convey it at all. For example, if you agree to 544 | terms that obligate you to collect a royalty for further conveying 545 | from those to whom you convey the Program, the only way you could 546 | satisfy both those terms and this License would be to refrain entirely 547 | from conveying the Program. 548 | 549 | #### 13. Use with the GNU Affero General Public License. 550 | 551 | Notwithstanding any other provision of this License, you have 552 | permission to link or combine any covered work with a work licensed 553 | under version 3 of the GNU Affero General Public License into a single 554 | combined work, and to convey the resulting work. The terms of this 555 | License will continue to apply to the part which is the covered work, 556 | but the special requirements of the GNU Affero General Public License, 557 | section 13, concerning interaction through a network will apply to the 558 | combination as such. 559 | 560 | #### 14. Revised Versions of this License. 561 | 562 | The Free Software Foundation may publish revised and/or new versions 563 | of the GNU General Public License from time to time. Such new versions 564 | will be similar in spirit to the present version, but may differ in 565 | detail to address new problems or concerns. 566 | 567 | Each version is given a distinguishing version number. If the Program 568 | specifies that a certain numbered version of the GNU General Public 569 | License "or any later version" applies to it, you have the option of 570 | following the terms and conditions either of that numbered version or 571 | of any later version published by the Free Software Foundation. If the 572 | Program does not specify a version number of the GNU General Public 573 | License, you may choose any version ever published by the Free 574 | Software Foundation. 575 | 576 | If the Program specifies that a proxy can decide which future versions 577 | of the GNU General Public License can be used, that proxy's public 578 | statement of acceptance of a version permanently authorizes you to 579 | choose that version for the Program. 580 | 581 | Later license versions may give you additional or different 582 | permissions. However, no additional obligations are imposed on any 583 | author or copyright holder as a result of your choosing to follow a 584 | later version. 585 | 586 | #### 15. Disclaimer of Warranty. 587 | 588 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 589 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 590 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT 591 | WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT 592 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 593 | A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND 594 | PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE 595 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR 596 | CORRECTION. 597 | 598 | #### 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR 602 | CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 603 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES 604 | ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT 605 | NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR 606 | LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM 607 | TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER 608 | PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 609 | 610 | #### 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | ### How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these 626 | terms. 627 | 628 | To do so, attach the following notices to the program. It is safest to 629 | attach them to the start of each source file to most effectively state 630 | the exclusion of warranty; and each file should have at least the 631 | "copyright" line and a pointer to where the full notice is found. 632 | 633 | 634 | Copyright (C) 635 | 636 | This program is free software: you can redistribute it and/or modify 637 | it under the terms of the GNU General Public License as published by 638 | the Free Software Foundation, either version 3 of the License, or 639 | (at your option) any later version. 640 | 641 | This program is distributed in the hope that it will be useful, 642 | but WITHOUT ANY WARRANTY; without even the implied warranty of 643 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 644 | GNU General Public License for more details. 645 | 646 | You should have received a copy of the GNU General Public License 647 | along with this program. If not, see . 648 | 649 | Also add information on how to contact you by electronic and paper 650 | mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands \`show w' and \`show c' should show the 661 | appropriate parts of the General Public License. Of course, your 662 | program's commands might be different; for a GUI interface, you would 663 | use an "about box". 664 | 665 | You should also get your employer (if you work as a programmer) or 666 | school, if any, to sign a "copyright disclaimer" for the program, if 667 | necessary. For more information on this, and how to apply and follow 668 | the GNU GPL, see . 669 | 670 | The GNU General Public License does not permit incorporating your 671 | program into proprietary programs. If your program is a subroutine 672 | library, you may consider it more useful to permit linking proprietary 673 | applications with the library. If this is what you want to do, use the 674 | GNU Lesser General Public License instead of this License. But first, 675 | please read . 676 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # PatternOmatic Makefile 3 | # 4 | # This file is part of PatternOmatic. 5 | # 6 | # Copyright © 2020 Miguel Revuelta Espinosa 7 | # 8 | # PatternOmatic is free software: you can redistribute it and/or 9 | # modify it under the terms of the GNU Lesser General Public License 10 | # as published by the Free Software Foundation, either version 3 of 11 | # the License, or (at your option) any later version. 12 | # 13 | # PatternOmatic is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Lesser General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Lesser General Public License 19 | # along with PatternOmatic. If not, see . 20 | # 21 | export PYTHONPATH=. 22 | 23 | all: libs coverage clean build sonar 24 | 25 | venv: 26 | source venv/bin/activate 27 | 28 | clean: 29 | rm -rf `pwd`/build 30 | rm -rf `pwd`/dist 31 | rm -rf `pwd`/PatternOmatic.egg-info 32 | rm -rf `pwd`/fil-result 33 | 34 | libs: 35 | pip install -r requirements.txt 36 | 37 | test: 38 | python -m unittest 39 | 40 | coverage: 41 | coverage run --branch --source=PatternOmatic,scripts,tests --omit=*__init__* -m unittest && \ 42 | coverage report --ignore-errors --omit=venv/**,tests/**,*__init__* && \ 43 | coverage xml 44 | 45 | sonar: 46 | sonar-scanner -Dsonar.projectKey=pOm -Dsonar.exclusions=tests/** 47 | 48 | sonarcloud: 49 | sonar-scanner -Dsonar.projectKey=revuel_PatternOmatic 50 | 51 | build: 52 | python setup.py sdist bdist_wheel 53 | 54 | publish: 55 | twine upload -u __token__ -p ${PYPI_TOKEN} --repository-url https://upload.pypi.org/legacy/ dist/* 56 | 57 | run: 58 | python ./scripts/patternomatic.py -s Hello Mr. Puffin -s Goodbye Mrs. Muffin 59 | -------------------------------------------------------------------------------- /PatternOmatic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/__init__.py -------------------------------------------------------------------------------- /PatternOmatic/api.py: -------------------------------------------------------------------------------- 1 | """ Application Programming Interface module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import time 22 | import pkg_resources 23 | from typing import List, Union, Tuple, Any 24 | from spacy import load as spacy_load 25 | from spacy.cli import download as spacy_download 26 | 27 | from PatternOmatic.ge.population import Population 28 | from PatternOmatic.ge.stats import Stats 29 | from PatternOmatic.settings.config import Config 30 | from PatternOmatic.settings.log import LOG 31 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg 32 | 33 | 34 | def find_patterns( 35 | samples: List[str], 36 | configuration: Union[str, None] = None, 37 | spacy_language_model_name: Union[str, None] = None) -> List[Tuple[Any, ...]]: 38 | """ 39 | Given some samples, this function finds optimized patterns to be used by the Spacy's Rule Based Matcher. 40 | Args: 41 | samples: List of strings from where to find common linguistic patterns 42 | configuration: (str) Optional configuration file path to to be loaded (Fallbacks to default configuration) 43 | spacy_language_model_name: (str) Optional valid Spacy Language Model (Fallbacks to Spacy's en_core_web_sm) 44 | 45 | Returns: List of patterns found and list of each pattern matching score against the samples 46 | 47 | """ 48 | LOG.info(f'Loading language model {spacy_language_model_name}...') 49 | if 'en-core-web-sm' not in [d.project_name for d in pkg_resources.working_set]: 50 | LOG.info(f'PatternOmatic\'s default spaCy\'s Language Model not installed,' 51 | f' proceeding to install en_core_web_sm, please wait...') 52 | spacy_download('en_core_web_sm') 53 | 54 | try: 55 | nlp = spacy_load(spacy_language_model_name) 56 | except OSError: 57 | LOG.warning(f'Model {spacy_language_model_name} not found, ' 58 | f'falling back to patternOmatic\'s default language model: en_core_web_sm') 59 | 60 | nlp = spacy_load('en_core_web_sm') 61 | 62 | LOG.info(f'Building Doc instances...') 63 | samples = [nlp(sample) for sample in samples] 64 | 65 | if isinstance(configuration, str): 66 | LOG.info(f'Setting up configuration from the following path: {configuration}...') 67 | config = Config(config_file_path=configuration) 68 | else: 69 | config = Config() 70 | LOG.info(f'Existing Config instance found: {config}') 71 | 72 | stats = Stats() 73 | 74 | bnf_g = dgg(samples) 75 | 76 | LOG.info('Starting Execution...') 77 | for _ in range(0, config.max_runs): 78 | start = time.monotonic() 79 | p = Population(samples, bnf_g, stats) 80 | p.evolve() 81 | end = time.monotonic() 82 | stats.add_time(end - start) 83 | stats.calculate_metrics() 84 | 85 | LOG.info(f'Execution report {stats}') 86 | stats.persist() 87 | 88 | LOG.info(f'Best individuals for this execution:') 89 | stats.most_fitted_accumulator.sort(key=lambda i: i.fitness_value, reverse=True) 90 | for individual in stats.most_fitted_accumulator: 91 | LOG.info(f'{individual}') 92 | 93 | return list(zip(*[[i.fenotype, i.fitness_value] for i in stats.most_fitted_accumulator])) 94 | -------------------------------------------------------------------------------- /PatternOmatic/ge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/ge/__init__.py -------------------------------------------------------------------------------- /PatternOmatic/ge/individual.py: -------------------------------------------------------------------------------- 1 | """ Evolutionary Individual related classes module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import re 22 | import json 23 | 24 | from random import random 25 | from itertools import cycle 26 | from spacy.tokens import Doc 27 | from spacy.matcher import Matcher 28 | 29 | from PatternOmatic.ge.stats import Stats 30 | from PatternOmatic.settings.config import Config 31 | from PatternOmatic.settings.log import LOG 32 | from PatternOmatic.settings.literals import FitnessType, S, T, XPS, TOKEN_WILDCARD, UNDERSCORE, P, F, EF, IN, NOT_IN, \ 33 | SLD, SRD, GTH, LTH, GEQ, LEQ, EQQ, XPS_AS 34 | 35 | 36 | class Fitness(object): 37 | """ Dispatches the proper fitness type for individual instances """ 38 | __slots__ = ('_fitness', 'config', 'samples', 'fenotype') 39 | 40 | def __init__(self, config, samples, fenotype): 41 | self.config = config 42 | self.samples = samples 43 | self.fenotype = fenotype 44 | self._dispatch_fitness(self.config.fitness_function_type) 45 | 46 | def __call__(self, *args, **kwargs) -> float: 47 | return self._fitness() 48 | 49 | def _dispatch_fitness(self, fitness_function_type: FitnessType) -> None: 50 | """ 51 | Sets the type of the fitness function for an Individual instance 52 | Args: 53 | fitness_function_type: The fitness function to be used 54 | 55 | Returns: None 56 | 57 | """ 58 | if fitness_function_type == FitnessType.FULL_MATCH: 59 | self._fitness = self._fitness_full_match 60 | else: 61 | self._fitness = self._fitness_basic 62 | 63 | def _fitness_basic(self) -> float: 64 | """ 65 | Sets the fitness value for an individual. If makes a partial match over a sample, a score is added 66 | for that sample even if the matches are only a portion of the sample's length 67 | Returns: Float (fitness value) 68 | 69 | """ 70 | max_score_per_sample = 1 / len(self.samples) 71 | matcher = Matcher(self.samples[0].vocab) 72 | matcher.add(repr(FitnessType.BASIC), None, self.fenotype) 73 | contact = 0.0 74 | 75 | for sample in self.samples: 76 | matches = matcher(sample) 77 | if len(matches) > 0: 78 | contact += max_score_per_sample 79 | 80 | return self._wildcard_penalty(contact) 81 | 82 | def _fitness_full_match(self) -> float: 83 | """ 84 | Sets the fitness value for an individual. It only gives a partial score if any of the matches equals the full 85 | length of the sample 86 | Returns: Float 87 | 88 | """ 89 | max_score_per_sample = 1 / len(self.samples) 90 | 91 | current_vocab = self.samples[0].vocab 92 | 93 | matcher = Matcher(current_vocab) 94 | matcher.add(repr(FitnessType.FULL_MATCH), None, self.fenotype) 95 | contact = 0.0 96 | 97 | for sample in self.samples: 98 | matches = matcher(sample) 99 | if len(matches) > 0: 100 | for match in matches: 101 | contact += max_score_per_sample if match[2] == len(sample) and match[1] == 0 else + 0 102 | return self._wildcard_penalty(contact) 103 | 104 | def _wildcard_penalty(self, contact: float) -> float: 105 | """ 106 | Applies a penalty for the usage of token wildcard if usage of token wildcard is enabled 107 | Args: 108 | contact: Temporary fitness value for the current individual 109 | 110 | Returns: Final fitness value for the current individual 111 | 112 | """ 113 | if self.config.use_token_wildcard: 114 | num_tokens = len(self.fenotype) 115 | for item in self.fenotype: 116 | if item == {}: 117 | LOG.debug('Applying token wildcard penalty!') 118 | penalty = 1/num_tokens 119 | contact -= penalty 120 | 121 | return contact 122 | 123 | 124 | class Individual(object): 125 | """ Individual implementation of an AI Grammatical Evolution algorithm in OOP fashion """ 126 | __slots__ = ('config', 'samples', 'grammar', 'stats', 'bin_genotype', 'int_genotype', 'fenotype', 'fitness_value') 127 | 128 | def __init__(self, samples: [Doc], grammar: dict, stats: Stats, dna: str = None): 129 | """ 130 | Individual constructor, if dna is not supplied, sets up randomly its binary genotype 131 | Args: 132 | samples: list of Spacy doc objects 133 | grammar: Backus Naur Form grammar notation encoded in a dictionary 134 | stats (Stats): statistics object related with this run 135 | dna: Optional, binary string representation 136 | """ 137 | self.config = Config() 138 | 139 | self.samples = samples 140 | self.grammar = grammar 141 | self.stats = stats 142 | self.bin_genotype = self._initialize() if dna is None else self.mutate(dna, self.config.mutation_probability) 143 | self.int_genotype = self._transcription() 144 | self.fenotype = self._translation() 145 | self.fitness_value = Fitness(self.config, self.samples, self.fenotype).__call__() 146 | 147 | # Stats concerns 148 | self._is_solution() 149 | 150 | @property 151 | def __dict__(self): 152 | """ Dictionary representation for a slotted class (that has no dict at all) """ 153 | # Above works just for POPOs 154 | return {s: getattr(self, s, None) for s in self.__slots__ if s in ('bin_genotype', 'fenotype', 'fitness_value')} 155 | 156 | def __repr__(self): 157 | """ String representation of a slotted class using hijacked dict """ 158 | return f'{self.__class__.__name__}({self.__dict__})' 159 | 160 | # 161 | # Problem specific GE methods 162 | # 163 | def _initialize(self) -> str: 164 | """ 165 | Sets up randomly the binary string representation of an individual 166 | Returns: String, binary fashion 167 | 168 | """ 169 | return ''.join([''.join('1') if random() > 0.5 170 | else ''.join('0') for _ in range(0, self.config.dna_length)]).strip() 171 | 172 | def _transcription(self) -> [int]: 173 | """ 174 | Converts a binary string representation to an integer representation codon by codon 175 | Returns: List of integers 176 | 177 | """ 178 | return [int(self.bin_genotype[i:(i+self.config.codon_length-1)], 2) 179 | for i in range(0, len(self.bin_genotype), self.config.codon_length-1)] 180 | 181 | def _translation(self): 182 | done = False 183 | symbolic_string = self.grammar[S][0] # Root 184 | circular = cycle(self.int_genotype) 185 | 186 | while done is not True: 187 | # First save previous iteration copy 188 | old_symbolic_string = symbolic_string 189 | ci = next(circular) 190 | 191 | for key in self.grammar.keys(): 192 | symbolic_string = self._translate(ci, key, symbolic_string) 193 | 194 | # Check if anything changed from last iteration 195 | if old_symbolic_string == symbolic_string: 196 | done = True 197 | 198 | translated_individual = '[' + symbolic_string + ']' 199 | 200 | return json.loads(translated_individual) 201 | 202 | def _translate(self, ci: iter, key, symbolic_string: str): 203 | """ 204 | Helper method to reduce cognitive overload of the public method with the same name (_translation) 205 | Args: 206 | ci: Last circular iterator 207 | key: Last key in the grammar dict 208 | symbolic_string: String representation of the individual's Spacy's Rule Based Matcher pattern 209 | 210 | Returns: String representation of the individual's Spacy's Rule Based Matcher pattern 211 | 212 | """ 213 | fire = divmod(ci, len(self.grammar[key]))[1] 214 | 215 | if key in [T, XPS]: 216 | fired_rule = self.grammar[key][fire] 217 | if fired_rule == TOKEN_WILDCARD: 218 | symbolic_string = re.sub(key, "{}", symbolic_string, 1) 219 | else: 220 | symbolic_string = re.sub(key, "{" + str(self.grammar[key][fire]) + "}", symbolic_string, 1) 221 | 222 | elif key is UNDERSCORE: 223 | symbolic_string = re.sub(key, "\"_\"" + ": " + "{" + str(self.grammar[key][fire]) + "}", symbolic_string, 1) 224 | 225 | elif key in [P, T, F, EF]: 226 | symbolic_string = re.sub(key, str(self.grammar[key][fire]), symbolic_string, 1) 227 | 228 | elif key in [IN, NOT_IN]: 229 | key_r = key.replace(SLD, '').replace(SRD, '') 230 | feature = "\"" + key_r + "\"" + ":" + str(self.grammar[key][fire]).replace("\'", "\"").replace("\'", "") 231 | symbolic_string = re.sub(key, feature, symbolic_string, 1) 232 | 233 | elif key in [GTH, LTH, GEQ, LEQ, EQQ]: 234 | feature = "\"" + XPS_AS[key] + "\"" + ":" + str(self.grammar[key][fire]) 235 | symbolic_string = re.sub(key, feature, symbolic_string, 1) 236 | 237 | else: 238 | key_r = key.replace(SLD, '').replace(SRD, '') 239 | fired_rule = str(self.grammar[key][fire]) 240 | if fired_rule != XPS: 241 | feature = "\"" + key_r + "\"" + ":" + "\"" + fired_rule + "\"" 242 | else: 243 | feature = "\"" + key_r + "\"" + ":" + fired_rule 244 | symbolic_string = re.sub(key, feature, symbolic_string, 1) 245 | 246 | return symbolic_string 247 | 248 | # 249 | # Generic GA methods 250 | # 251 | @classmethod 252 | def mutate(cls, dna, mutation_probability) -> str: 253 | """ 254 | Mutates a given dna string by a mutation probability 255 | Args: 256 | dna: binary string representation of a dna sequence 257 | mutation_probability: Chances of each gen to be mutated 258 | 259 | Returns: Binary string 260 | 261 | """ 262 | mutated_dna = '' 263 | 264 | for gen in dna: 265 | if random() < mutation_probability: 266 | if gen == '1': 267 | mutated_dna += '0' 268 | else: 269 | mutated_dna += '1' 270 | else: 271 | mutated_dna += gen 272 | return mutated_dna 273 | 274 | # 275 | # Stats concerns 276 | # 277 | def _is_solution(self) -> None: 278 | """ 279 | Method to manage AES for the given RUN 280 | 281 | """ 282 | if self.stats.solution_found is False: 283 | self.stats.sum_aes(1) 284 | if self.fitness_value >= self.config.success_threshold: 285 | LOG.debug('Solution found for this run!') 286 | self.stats.solution_found = True 287 | -------------------------------------------------------------------------------- /PatternOmatic/ge/population.py: -------------------------------------------------------------------------------- 1 | """ Evolutionary Population related classes module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import random 22 | from typing import List, Tuple, Dict 23 | from spacy.tokens import Doc 24 | 25 | from PatternOmatic.ge.individual import Individual 26 | from PatternOmatic.ge.stats import Stats 27 | from PatternOmatic.settings.config import Config 28 | from PatternOmatic.settings.literals import SelectionType, ReplacementType 29 | from PatternOmatic.settings.log import LOG 30 | 31 | 32 | class Selection(object): 33 | """ Dispatches the proper selection type for population instances """ 34 | __slots__ = '_select' 35 | 36 | def __init__(self, selection_type: SelectionType): 37 | self.__dispatch_selection(selection_type) 38 | 39 | def __call__(self, generation: List[Individual]) -> List[Individual]: 40 | """ 41 | Performs a selection operation for the population 42 | Args: 43 | generation: A list of Individual instances 44 | 45 | Returns: A list of Individual instances 46 | 47 | """ 48 | LOG.debug(f'Selecting individuals...') 49 | return self._select(generation) 50 | 51 | def __dispatch_selection(self, selection_type: SelectionType) -> None: 52 | """ 53 | Sets the type of the selection operation for the current evolution 54 | Args: 55 | selection_type: SelectionType Enum 56 | 57 | Returns: None 58 | 59 | """ 60 | if isinstance(selection_type, SelectionType): 61 | if selection_type == SelectionType.K_TOURNAMENT: 62 | self._select = self._k_tournament 63 | else: 64 | self._select = self._binary_tournament 65 | else: 66 | self._select = self._binary_tournament 67 | 68 | @staticmethod 69 | def _binary_tournament(generation: List[Individual]) -> List[Individual]: 70 | """ 71 | Selects members of the current generation into the mating pool in order to produce offspring by comparing pairs 72 | of Individuals and adding the best of each pair to the "mating pool" until its filled 73 | 74 | Args: 75 | generation: A list of Individual instances 76 | 77 | Returns: A list of Individual instances 78 | 79 | """ 80 | mating_pool = [] 81 | 82 | while len(mating_pool) <= len(generation): 83 | i = random.randint(0, len(generation) - 1) 84 | j = i 85 | 86 | while j == i: 87 | j = random.randint(0, len(generation) - 1) 88 | 89 | i = generation[i] 90 | j = generation[j] 91 | 92 | if i.fitness_value >= j.fitness_value: 93 | mating_pool.append(i) 94 | else: 95 | mating_pool.append(j) 96 | 97 | return mating_pool 98 | 99 | @staticmethod 100 | def _k_tournament(generation: List[Individual]) -> List[Individual]: 101 | """ 102 | Not implemented 103 | Args: 104 | generation: A list of Individual instances 105 | 106 | Raises: NotImplementedError 107 | Returns: A list of Individual instances 108 | 109 | """ 110 | # TODO(me): k tournament 111 | raise NotImplementedError 112 | 113 | 114 | class Recombination(object): 115 | """ Dispatches the proper recombination type for population instances """ 116 | __slots__ = ('_recombine', 'config', 'grammar', 'samples', 'stats') 117 | 118 | def __init__(self, grammar: Dict, samples: List[Doc], stats: Stats): 119 | self._recombine = None 120 | self.config = Config() 121 | self.grammar = grammar 122 | self.samples = samples 123 | self.stats = stats 124 | self.__dispatch_recombination_type() 125 | 126 | def __call__(self, mating_pool: List[Individual], generation: List[Individual]) -> List[Individual]: 127 | LOG.debug(f'Combining individuals...') 128 | return self._recombine(mating_pool, generation) 129 | 130 | def __dispatch_recombination_type(self) -> None: 131 | """ 132 | Sets the type of the selection operation for the current evolution 133 | 134 | Returns: None 135 | 136 | """ 137 | self._recombine = self._random_one_point_crossover 138 | 139 | def _random_one_point_crossover( 140 | self, mating_pool: List[Individual], generation: List[Individual]) -> List[Individual]: 141 | """ 142 | For each pair of Individual instances, recombines them produce two offsprings. Puts them all into the offspring 143 | Args: 144 | mating_pool: A list of Individual instances 145 | generation: A list of Individual instances 146 | 147 | Returns: A list of Individual instances 148 | 149 | """ 150 | offspring = [] 151 | offspring_max_size = round(len(generation) * self.config.offspring_max_size_factor) 152 | 153 | while len(offspring) <= offspring_max_size: 154 | parent_1 = random.choice(mating_pool) 155 | parent_2 = random.choice(mating_pool) 156 | 157 | if random.random() < self.config.mating_probability: 158 | cut = random.randint(1, self.config.codon_length - 1) * self.config.num_codons_per_individual 159 | 160 | # Create children 161 | child_1 = Individual(self.samples, self.grammar, self.stats, 162 | dna=parent_1.bin_genotype[:cut] + parent_2.bin_genotype[ 163 | -(self.config.dna_length - cut):]) 164 | 165 | child_2 = Individual(self.samples, self.grammar, self.stats, 166 | dna=parent_2.bin_genotype[:cut] + parent_1.bin_genotype[ 167 | -(self.config.dna_length - cut):]) 168 | 169 | offspring.append(child_1) 170 | offspring.append(child_2) 171 | 172 | return offspring 173 | 174 | 175 | class Replacement(object): 176 | """ Dispatches the proper recombination type for population instances """ 177 | __slots__ = '_replace' 178 | 179 | def __init__(self, replacement_type: ReplacementType): 180 | self.__dispatch_replacement_type(replacement_type) 181 | 182 | def __call__(self, generation: List[Individual], offspring: List[Individual]) \ 183 | -> Tuple[List[Individual], List[Individual]]: 184 | LOG.debug(f'Replacing individuals...') 185 | return self._replace(generation, offspring) 186 | 187 | def __dispatch_replacement_type(self, replacement_type: ReplacementType) -> None: 188 | """ 189 | Sets the type of the replacement operation for the current evolution 190 | Args: 191 | replacement_type: ReplacementType Enum 192 | 193 | Returns: None 194 | 195 | """ 196 | if isinstance(replacement_type, ReplacementType): 197 | if replacement_type == ReplacementType.MU_LAMBDA_WITH_ELITISM: 198 | self._replace = self._mu_lambda_elite 199 | elif replacement_type == ReplacementType.MU_LAMBDA_WITHOUT_ELITISM: 200 | self._replace = self._mu_lambda_no_elite 201 | else: 202 | self._replace = self._mu_plus_lambda 203 | else: 204 | self._replace = self._mu_plus_lambda 205 | 206 | @staticmethod 207 | def _mu_plus_lambda(generation: List[Individual], offspring: List[Individual]) \ 208 | -> Tuple[List[Individual], List[Individual]]: 209 | """ 210 | Produces the next generation combining the current generation with the offspring 211 | Args: 212 | generation: A list of Individual instances 213 | offspring: A list of Individual instances 214 | 215 | Returns: A tuple containing two list of Individual instances 216 | 217 | """ 218 | replacement_pool = generation + offspring 219 | replacement_pool.sort(key=lambda i: i.fitness_value, reverse=True) 220 | generation = replacement_pool[:len(generation)] 221 | offspring = [] 222 | 223 | return generation, offspring 224 | 225 | @staticmethod 226 | def _mu_lambda_elite(generation: List[Individual], offspring: List[Individual]) \ 227 | -> Tuple[List[Individual], List[Individual]]: 228 | """ 229 | Produces the next generation using the offspring and the best Individual of the current generation 230 | Args: 231 | generation: A list of Individual instances 232 | offspring: A list of Individual instances 233 | 234 | Returns: A tuple containing two list of Individual instances 235 | 236 | """ 237 | generation.sort(key=lambda i: i.fitness_value, reverse=True) 238 | offspring.sort(key=lambda i: i.fitness_value, reverse=True) 239 | generation[1:len(generation)] = offspring[0:len(generation)] 240 | offspring = [] 241 | 242 | return generation, offspring 243 | 244 | @staticmethod 245 | def _mu_lambda_no_elite(generation: List[Individual], offspring: List[Individual]) \ 246 | -> Tuple[List[Individual], List[Individual]]: 247 | """ 248 | Produces the next generation totally replacing the current generation with the offspring 249 | Args: 250 | generation: A list of Individual instances 251 | offspring: A list of Individual instances 252 | 253 | Returns: A tuple containing two list of Individual instances 254 | 255 | """ 256 | offspring.sort(key=lambda i: i.fitness_value, reverse=True) 257 | generation = offspring[0:len(generation)] 258 | offspring = [] 259 | 260 | return generation, offspring 261 | 262 | 263 | class Population(object): 264 | """ Population implementation of an AI Grammatical Evolution algorithm in OOP fashion """ 265 | __slots__ = ('config', 'samples', 'grammar', 'stats', 'generation', 'offspring', 'best_individual', 266 | 'selection', 'recombination', 'replacement') 267 | 268 | def __init__(self, samples: [Doc], grammar: dict, stats: Stats): 269 | """ 270 | Population constructor, initializes a list of Individual objects 271 | Args: 272 | samples: list of Spacy doc objets 273 | grammar: Backus Naur Form grammar notation encoded in a dictionary 274 | """ 275 | self.config = Config() 276 | 277 | self.samples = samples 278 | self.grammar = grammar 279 | self.stats = stats 280 | self.generation = self._genesis() 281 | self.offspring = list() 282 | self.best_individual = None 283 | 284 | self.selection = Selection(self.config.selection_type) 285 | self.recombination = Recombination(grammar, samples, stats) 286 | self.replacement = Replacement(self.config.replacement_type) 287 | 288 | # 289 | # Population specific methods 290 | # 291 | def _genesis(self) -> List[Individual]: 292 | """ 293 | Initializes the first generation 294 | Returns: A list of individual objects 295 | 296 | """ 297 | return [Individual(self.samples, self.grammar, self.stats) for _ in range(0, self.config.dna_length)] 298 | 299 | def _best_challenge(self) -> None: 300 | """ 301 | Compares current generation best fitness individual against previous generation best fitness individual. 302 | Updates the best individual attribute accordingly 303 | """ 304 | if self.best_individual is not None: 305 | if self.generation[0].fitness_value > self.best_individual.fitness_value: 306 | self.best_individual = self.generation[0] 307 | else: 308 | self.best_individual = self.generation[0] 309 | 310 | # 311 | # Evolution 312 | # 313 | def evolve(self): 314 | """ 315 | Search Engine: 316 | 1) Selects individuals of the current generation to constitute who will mate 317 | 2) Crossover or recombination of the previously selected individuals 318 | 3) Replace/mix the this generation with the offspring 319 | 4) Save the best individual by fitness 320 | 5) Calculate statistics for this Run 321 | """ 322 | 323 | LOG.info('Evolution taking place, please wait...') 324 | 325 | self.stats.reset() 326 | 327 | for _ in range(self.config.max_generations): 328 | mating_pool = self.selection(self.generation) 329 | self.offspring = self.recombination(mating_pool, self.generation) 330 | self.generation, self.offspring = self.replacement(self.generation, self.offspring) 331 | self._best_challenge() 332 | 333 | LOG.info(f'Best candidate found on this run: {self.best_individual}') 334 | 335 | # Stats concerns 336 | self.stats.add_most_fitted(self.best_individual) 337 | self.stats.add_mbf(self.best_individual.fitness_value) 338 | 339 | if self.best_individual.fitness_value > self.config.success_threshold: 340 | self.stats.add_sr(True) 341 | else: 342 | self.stats.add_sr(False) 343 | -------------------------------------------------------------------------------- /PatternOmatic/ge/stats.py: -------------------------------------------------------------------------------- 1 | """ Grammatical Evolution performance metrics module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import operator 22 | from time import time 23 | 24 | from PatternOmatic.settings.literals import ReportFormat 25 | from PatternOmatic.settings.config import Config 26 | 27 | 28 | class Stats(object): 29 | """ Class responsible of handling performance metrics """ 30 | __slots__ = [ 31 | 'config', 32 | 'success_rate_accumulator', 33 | 'mbf_accumulator', 34 | 'aes_accumulator', 35 | 'time_accumulator', 36 | 'most_fitted_accumulator', 37 | 'solution_found', 38 | 'success_rate', 39 | 'mbf', 40 | 'aes', 41 | 'mean_time', 42 | 'aes_counter' 43 | ] 44 | 45 | def __init__(self): 46 | """ Stats instances constructor """ 47 | self.config = Config() 48 | self.success_rate_accumulator = list() 49 | self.mbf_accumulator = list() 50 | self.aes_accumulator = list() 51 | self.time_accumulator = list() 52 | self.most_fitted_accumulator = list() 53 | self.solution_found = False 54 | self.success_rate = None 55 | self.mbf = None 56 | self.aes = None 57 | self.mean_time = None 58 | 59 | self.aes_counter = 0 60 | 61 | @property 62 | def __dict__(self): 63 | """ Dictionary representation for a slotted class (that has no dict at all) """ 64 | # Above works just for POPOs 65 | stats_dict = \ 66 | {s: getattr(self, s, None) for s in self.__slots__ if s in ('success_rate', 'mbf', 'aes', 'mean_time')} 67 | 68 | most_fitted = self.get_most_fitted() 69 | most_fitted_dict = {'most_fitted': most_fitted.__dict__} if most_fitted is not None else {'most_fitted': None} 70 | stats_dict.update(most_fitted_dict) 71 | 72 | return stats_dict 73 | 74 | def __repr__(self): 75 | """ String representation of a slotted class using hijacked dict """ 76 | return f'{self.__class__.__name__}({self.__dict__})' 77 | 78 | def __iter__(self): 79 | """ Enable dict(self) """ 80 | yield from self.__dict__.items() 81 | 82 | # 83 | # Accumulators & Counters 84 | # 85 | def add_sr(self, sr: bool) -> None: 86 | """ 87 | Adds a new Success Rate value to the accumulator 88 | Args: 89 | sr: Boolean value that indicates if the RUN succeeded (True) or not (False) 90 | 91 | """ 92 | self.success_rate_accumulator.append(sr) 93 | 94 | def add_mbf(self, bf: float) -> None: 95 | """ 96 | Adds a new Best Fitness value to the accumulator 97 | Args: 98 | bf: Best fitness fount over a RUN 99 | 100 | """ 101 | self.mbf_accumulator.append(bf) 102 | 103 | def add_aes(self, es: int) -> None: 104 | """ 105 | Adds a new Evaluations to Solution value to the accumulator 106 | Args: 107 | es: Number of evaluations to solution over a RUN 108 | 109 | """ 110 | self.aes_accumulator.append(es) 111 | 112 | def add_time(self, time_interval: float) -> None: 113 | """ 114 | Adds a new Time lapsed value to the accumulator 115 | Args: 116 | time_interval: Time lapsed of a RUN 117 | 118 | """ 119 | self.time_accumulator.append(time_interval) 120 | 121 | def add_most_fitted(self, individual: any) -> None: 122 | """ 123 | Adds a new individual to the accumulator 124 | Args: 125 | individual: Individual with best fitness found over a RUN 126 | 127 | Returns: 128 | 129 | """ 130 | self.most_fitted_accumulator.append(individual) 131 | 132 | def sum_aes(self, es: int) -> None: 133 | """ 134 | Sums a new Evaluations to Solution value to the counter 135 | Args: 136 | es: Number of evaluations to Solution of a given Run 137 | 138 | Returns: 139 | 140 | """ 141 | self.aes_counter += es 142 | 143 | # 144 | # Metrics 145 | # 146 | def reset(self): 147 | """ Resets variables that depend on the run """ 148 | self.aes_counter = 0 149 | self.solution_found = False 150 | 151 | def calculate_metrics(self): 152 | """ Calculates the common GE evaluation metrics """ 153 | self.add_aes(self.aes_counter) 154 | self.success_rate = Stats.avg(self.success_rate_accumulator) 155 | self.mbf = Stats.avg(self.mbf_accumulator) 156 | self.aes = Stats.avg(self.aes_accumulator) 157 | self.mean_time = Stats.avg(self.time_accumulator) 158 | 159 | # 160 | # Auxiliary methods 161 | # 162 | def get_most_fitted(self): 163 | """ 164 | Best individual found 165 | Returns: Individual with Best Fitness found for this Execution 166 | 167 | """ 168 | return max(self.most_fitted_accumulator, key=operator.attrgetter('fitness_value')) \ 169 | if len(self.most_fitted_accumulator) > 0 else None 170 | 171 | @staticmethod 172 | def avg(al: list) -> float: 173 | """ 174 | Returns the mean of a list if the list is not empty 175 | Args: 176 | al: List instance 177 | 178 | Returns: float, the mean/average of the list 179 | 180 | """ 181 | return sum(al) / len(al) if len(al) > 0 else 0.0 182 | 183 | def persist(self) -> None: 184 | """ 185 | Makes or append execution result to file. If no valid format is specified CSV will be used as default 186 | Returns: None 187 | 188 | """ 189 | if self.config.report_format == ReportFormat.JSON: 190 | with open(self.config.report_path, mode='a+') as f: 191 | f.writelines(f'{dict(self)}' + '\n') 192 | else: 193 | with open(self.config.report_path, mode='a+') as f: 194 | f.writelines(self._to_csv() + '\n') 195 | 196 | def _to_csv(self): 197 | """ 198 | Generates Comma Separated Value (csv) representation of a Stats instance object 199 | Returns: String, csv instance representation 200 | 201 | """ 202 | csv = f'{time()}' + '\t' 203 | 204 | for k, v in self.__dict__.items(): 205 | if not type(v) is dict: 206 | csv = csv + str(v) + '\t' 207 | else: 208 | for _, vi in v.items(): 209 | csv = csv + str(vi) + '\t' 210 | return csv 211 | -------------------------------------------------------------------------------- /PatternOmatic/nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/nlp/__init__.py -------------------------------------------------------------------------------- /PatternOmatic/nlp/bnf.py: -------------------------------------------------------------------------------- 1 | """ Backus Naur Form Grammar Generator module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | from inspect import getmembers 22 | from spacy.tokens import Doc, Token 23 | from PatternOmatic.settings.config import Config 24 | from PatternOmatic.settings.literals import S, P, T, F, OP, NEGATION, ZERO_OR_ONE, ZERO_OR_MORE, ONE_OR_MORE, LENGTH, \ 25 | XPS, IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH, TOKEN_WILDCARD, UNDERSCORE, EF, ORTH, TEXT, LOWER, POS, TAG, DEP, LEMMA, \ 26 | SHAPE, ENT_TYPE, IS_ALPHA, IS_ASCII, IS_DIGIT, IS_BRACKET, IS_LOWER, IS_PUNCT, IS_QUOTE, IS_SPACE, IS_TITLE, \ 27 | IS_OOV, IS_UPPER, IS_STOP, IS_CURRENCY, IS_LEFT_PUNCT, IS_RIGHT_PUNCT, LIKE_NUM, LIKE_EMAIL, \ 28 | LANG, NORM, PREFIX, SENTIMENT, STRING, SUFFIX, TEXT_WITH_WS, WHITESPACE, LIKE_URL, MATCHER_SUPPORTED_ATTRIBUTES, \ 29 | ENT_ID, ENT_IOB, ENT_KB_ID, HAS_VECTOR 30 | from PatternOmatic.settings.log import LOG 31 | 32 | 33 | # 34 | # Dynamic Grammar (Backus Naur Form) Generator 35 | # 36 | def dynamic_generator(samples: [Doc]) -> dict: 37 | """ 38 | Dynamically generates a grammar in Backus Naur Form (BNF) notation representing the available Spacy NLP 39 | Linguistic Feature values of the given sample list of Doc instances 40 | Args: 41 | samples: List of Spacy Doc objects 42 | 43 | Returns: Backus Naur Form grammar notation encoded in a dictionary 44 | 45 | """ 46 | config = Config() 47 | 48 | LOG.info(f'Generating BNF based on the following samples: {str(samples)}') 49 | 50 | # BNF root 51 | pattern_grammar = {S: [P]} 52 | 53 | # Watch out features of seen samples and max number of tokens per sample 54 | max_length_token, min_length_token, features_dict, extended_features = _features_seen(samples) 55 | 56 | # Update times token per pattern [Min length of tokens, Max length of tokens] interval 57 | pattern_grammar[P] = _symbol_stacker(T, max_length_token, min_length_token) 58 | 59 | # Update times features per token (Max length of features) 60 | pattern_grammar[T] = _symbol_stacker(F, _get_features_per_token(features_dict)) 61 | 62 | if config.use_token_wildcard is True: 63 | pattern_grammar[T].append(TOKEN_WILDCARD) 64 | 65 | # Update available features (just the features list) 66 | list_of_features = list(features_dict.keys()) 67 | if config.use_grammar_operators is True and config.use_extended_pattern_syntax is False: 68 | pattern_grammar = _add_grammar_operators(pattern_grammar, list_of_features) 69 | elif config.use_extended_pattern_syntax is True and config.use_grammar_operators is False: 70 | pattern_grammar = _add_extended_pattern_syntax(pattern_grammar, list_of_features, features_dict) 71 | else: 72 | pattern_grammar[F] = list_of_features 73 | 74 | # Update each feature possible values 75 | for k, v in features_dict.items(): 76 | if config.use_extended_pattern_syntax is True: 77 | v.append(XPS) 78 | pattern_grammar.update({k: v}) 79 | 80 | if config.use_custom_attributes is True: 81 | pattern_grammar = _add_custom_attributes(pattern_grammar, extended_features) 82 | 83 | LOG.info(f'Dynamically generated BNF: {str(pattern_grammar)}') 84 | 85 | return pattern_grammar 86 | 87 | 88 | # 89 | # BNF Utilities 90 | # 91 | def _features_seen(samples: [Doc]) -> (int, int, dict, dict): 92 | """ 93 | Builds up a dictionary containing Spacy Linguistic Feature Keys and their respective seen values for the sample 94 | Args: 95 | samples: List of Spacy Doc objects 96 | 97 | Returns: Integer, the max length of a doc within the sample and a dict of features 98 | 99 | """ 100 | config = Config() 101 | 102 | # Just tokenizer features 103 | orth_list = [] 104 | text_list = [] 105 | lower_list = [] 106 | length_list = [] 107 | shape_list = [] 108 | 109 | # For boolean features 110 | bool_list = [True, False] 111 | 112 | # Require more than a tokenizer 113 | pos_list = [] 114 | tag_list = [] 115 | dep_list = [] 116 | lemma_list = [] 117 | ent_type_list = [] 118 | 119 | # Capture the len of the largest doc 120 | max_doc_length = 0 121 | min_doc_length = 999999999 122 | 123 | # Set token extensions 124 | if config.use_custom_attributes is True: 125 | _set_token_extension_attributes(samples[0][0]) 126 | extended_features = _extended_features_seen([token for sample in samples for token in sample]) 127 | else: 128 | extended_features = {UNDERSCORE: {}} 129 | 130 | for sample in samples: 131 | sample_length = len(sample) 132 | 133 | for token in sample: 134 | orth_list.append(token.orth_) 135 | text_list.append(token.text) 136 | lower_list.append(token.lower_) 137 | length_list.append(len(token)) 138 | pos_list.append(token.pos_) 139 | tag_list.append(token.tag_) 140 | dep_list.append(token.dep_) 141 | lemma_list.append(token.lemma_) 142 | shape_list.append(token.shape_) 143 | ent_type_list.append(token.ent_type_) 144 | 145 | # Checks for max/min length of tokens per sample 146 | if sample_length > max_doc_length: 147 | max_doc_length = sample_length 148 | 149 | if sample_length < min_doc_length: 150 | min_doc_length = sample_length 151 | 152 | if config.use_uniques is True: 153 | features = {ORTH: sorted(list(set(orth_list))), 154 | TEXT: sorted(list(set(text_list))), 155 | LOWER: sorted(list(set(lower_list))), 156 | LENGTH: sorted(list(set(length_list))), 157 | POS: sorted(list(set(pos_list))), 158 | TAG: sorted(list(set(tag_list))), 159 | DEP: sorted(list(set(dep_list))), 160 | LEMMA: sorted(list(set(lemma_list))), 161 | SHAPE: sorted(list(set(shape_list))), 162 | ENT_TYPE: sorted(list(set(ent_type_list)))} 163 | else: 164 | features = {ORTH: orth_list, 165 | TEXT: text_list, 166 | LOWER: lower_list, 167 | LENGTH: length_list, 168 | POS: pos_list, 169 | TAG: tag_list, 170 | DEP: dep_list, 171 | LEMMA: lemma_list, 172 | SHAPE: shape_list, 173 | ENT_TYPE: ent_type_list} 174 | 175 | # Add boolean features 176 | if config.use_boolean_features is True: 177 | features.update({ 178 | IS_ALPHA: bool_list, 179 | IS_ASCII: bool_list, 180 | IS_DIGIT: bool_list, 181 | IS_LOWER: bool_list, 182 | IS_UPPER: bool_list, 183 | IS_TITLE: bool_list, 184 | IS_PUNCT: bool_list, 185 | IS_SPACE: bool_list, 186 | IS_STOP: bool_list, 187 | LIKE_NUM: bool_list, 188 | LIKE_URL: bool_list, 189 | LIKE_EMAIL: bool_list 190 | }) 191 | 192 | # Drop all observations equal to empty string 193 | features = _feature_pruner(features) 194 | extended_features[UNDERSCORE] = _feature_pruner(extended_features[UNDERSCORE]) 195 | 196 | return max_doc_length, min_doc_length, features, extended_features 197 | 198 | 199 | def _set_token_extension_attributes(token: Token) -> None: 200 | """ 201 | Given a Spacy Token instance, register all the Spacy token attributes not accepted by the Spacy Matcher 202 | as custom attributes inside the Token Extensions (token._. space) 203 | Returns: None 204 | 205 | """ 206 | # Retrieve cleaned up Token Attributes 207 | token_attributes = _clean_token_attributes( 208 | {k: v for k, v in getmembers(token) if type(v) in (str, bool, float)}) 209 | 210 | # Set token custom attributes 211 | lambda_list = [] 212 | i = 0 213 | for k, v in token_attributes.items(): 214 | lambda_list.append(lambda token_=token, k_=k: getattr(token_, k_)) 215 | token.set_extension(str('custom_'+k).upper(), getter=lambda_list[i]) 216 | i += 1 217 | 218 | 219 | def _clean_token_attributes(token_attributes: dict) -> dict: 220 | """ 221 | Removes from input dict keys contained in a set that represents the Spacy Matcher supported token attributes 222 | Args: 223 | token_attributes: dict of token features 224 | 225 | Returns: Token attributes dict without Spacy Matcher's supported attribute keys 226 | 227 | """ 228 | token_attributes.pop('__doc__') 229 | for item in MATCHER_SUPPORTED_ATTRIBUTES: 230 | token_attributes.pop(item) 231 | 232 | return token_attributes 233 | 234 | 235 | def _extended_features_seen(tokens: [Token]) -> dict: 236 | """ 237 | Builds up a dictionary containing Spacy Linguistic Feature Keys and their respective seen values for the 238 | input token list extended attributes (those attributes not accepted by the Spacy Matcher by default, 239 | included as token extensions) 240 | Args: 241 | tokens: List of Spacy Token instances 242 | 243 | Returns: dict of features 244 | 245 | """ 246 | bool_list = [True, False] 247 | 248 | extended_features = \ 249 | { 250 | UNDERSCORE: { 251 | ENT_ID: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_ID_') for token in tokens]))), 252 | ENT_IOB: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_IOB_') for token in tokens]))), 253 | ENT_KB_ID: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_KB_ID_') for token in tokens]))), 254 | HAS_VECTOR: bool_list, 255 | IS_BRACKET: bool_list, 256 | IS_CURRENCY: bool_list, 257 | IS_LEFT_PUNCT: bool_list, 258 | IS_OOV: bool_list, 259 | IS_QUOTE: bool_list, 260 | IS_RIGHT_PUNCT: bool_list, 261 | # IS_SENT_START: 262 | # sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_IS_SENT_START') for token in tokens]))), 263 | LANG: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_LANG_') for token in tokens]))), 264 | NORM: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_NORM_') for token in tokens]))), 265 | PREFIX: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_PREFIX_') for token in tokens]))), 266 | # PROB: 267 | # sorted(list(set([abs(getattr(getattr(token, '_'), 'CUSTOM_PROB')) for token in tokens]))), 268 | SENTIMENT: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_SENTIMENT') for token in tokens]))), 269 | STRING: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_STRING') for token in tokens]))), 270 | SUFFIX: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_SUFFIX_') for token in tokens]))), 271 | TEXT_WITH_WS: sorted(list(set( 272 | [getattr(getattr(token, '_'), 'CUSTOM_TEXT_WITH_WS') for token in tokens]))), 273 | WHITESPACE: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_WHITESPACE_') for token in tokens]))) 274 | } 275 | } 276 | 277 | return extended_features 278 | 279 | 280 | def _feature_pruner(features: dict) -> dict: 281 | """ 282 | Prunes dict keys whose values contain a list of repeated items 283 | Args: 284 | features: dict 285 | 286 | Returns: pruned dict 287 | 288 | """ 289 | # Drop all observations equal to empty string 290 | to_del_list = list() 291 | for k in features.keys(): 292 | if len(features[k]) == 1 and features[k][0] == '': 293 | to_del_list.append(k) 294 | 295 | for k_item in to_del_list: 296 | features.pop(k_item) 297 | 298 | return features 299 | 300 | 301 | def _symbol_stacker(symbol: str, max_length: int, min_length: int = 1) -> list: 302 | """ 303 | Given a symbol creates a list of length max_length where each item is symbol concat previous list item 304 | Args: 305 | symbol: string 306 | max_length: list max length 307 | 308 | Returns: list of symbol 309 | 310 | """ 311 | symbol_times_list = list() 312 | last = '' 313 | 314 | for _ in range(max_length): 315 | if last == '': 316 | last = symbol 317 | else: 318 | last = last + "," + symbol 319 | 320 | symbol_times_list.append(last) 321 | 322 | if 1 < min_length <= max_length: 323 | symbol_times_list = symbol_times_list[min_length-1:] 324 | 325 | return symbol_times_list 326 | 327 | 328 | def _get_features_per_token(features_dict: dict) -> int: 329 | """ 330 | Given the configuration set up, determine the maximum number of features per token at grammar 331 | Args: 332 | features_dict: dictionary of features keys with all possible feature value options 333 | 334 | Returns: integer 335 | 336 | """ 337 | config = Config() 338 | 339 | if config.features_per_token <= 0: 340 | max_length_features = len(features_dict.keys()) 341 | else: 342 | if len(features_dict.keys()) < config.features_per_token + 1: 343 | max_length_features = len(features_dict.keys()) 344 | else: 345 | max_length_features = config.features_per_token 346 | 347 | return max_length_features 348 | 349 | 350 | def _add_grammar_operators(pattern_grammar: dict, list_of_features: list) -> dict: 351 | """ 352 | Adds support to Spacy's grammar operators usage 353 | Args: 354 | pattern_grammar: BNF dict 355 | list_of_features: list of token features 356 | 357 | Returns: Backus Naur Form grammar notation encoded in a dictionary with Spacy's grammar operators 358 | 359 | """ 360 | list_of_features_op = list() 361 | for feature in list_of_features: 362 | list_of_features_op.append(feature) 363 | list_of_features_op.append(feature + ',' + OP) 364 | pattern_grammar[F] = list_of_features_op 365 | pattern_grammar[OP] = [NEGATION, ZERO_OR_ONE, ONE_OR_MORE, ZERO_OR_MORE] 366 | return pattern_grammar 367 | 368 | 369 | def _add_extended_pattern_syntax(pattern_grammar: dict, list_of_features: list, features_dict: dict) -> dict: 370 | """ 371 | Adds support to the extended pattern syntax at BNF dicts 372 | Args: 373 | pattern_grammar: BNF dict 374 | list_of_features: list of token features 375 | features_dict: dict of token features 376 | 377 | Returns: 378 | dict: Backus Naur Form grammar notation encoded in a dictionary with Spacy's extended pattern syntax 379 | """ 380 | tmp_lengths = features_dict[LENGTH].copy() 381 | full_terminal_stack = _all_feature_terminal_list(features_dict) 382 | pattern_grammar[F] = list_of_features 383 | pattern_grammar[XPS] = [IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH] 384 | pattern_grammar[IN] = full_terminal_stack 385 | pattern_grammar[NOT_IN] = full_terminal_stack 386 | pattern_grammar[EQQ] = tmp_lengths 387 | pattern_grammar[GEQ] = tmp_lengths 388 | pattern_grammar[LEQ] = tmp_lengths 389 | pattern_grammar[GTH] = tmp_lengths 390 | pattern_grammar[LTH] = tmp_lengths 391 | 392 | return pattern_grammar 393 | 394 | 395 | def _all_feature_terminal_list(features_dict: dict) -> list: 396 | """ 397 | Stacks all feature terminal options in a list of lists to be used for the extended pattern syntax set operators 398 | Args: 399 | features_dict: dictionary of feature keys with all possible feature value options 400 | 401 | Returns: 402 | 403 | """ 404 | all_terminal_list = list() 405 | 406 | for item in list(features_dict.items()): 407 | current_terminal_holder = list() 408 | 409 | for terminal_list_item in item[1]: 410 | if len(current_terminal_holder) > 0: 411 | temp_list = list(current_terminal_holder[-1]) 412 | temp_list.append(terminal_list_item) 413 | current_terminal_holder.append(temp_list) 414 | else: 415 | current_terminal_holder.append([terminal_list_item]) 416 | 417 | all_terminal_list += current_terminal_holder 418 | 419 | all_terminal_list = [ele for ind, ele in enumerate(all_terminal_list) if ele not in all_terminal_list[:ind]] 420 | return all_terminal_list 421 | 422 | 423 | def _add_custom_attributes(pattern_grammar: dict, extended_features: dict) -> dict: 424 | """ 425 | Adds support to a specific set of custom attributes at BNF dict 426 | Args: 427 | pattern_grammar: BNF dict 428 | extended_features: dict of token features not supported by default by the Spacy's Matcher 429 | 430 | Returns: Backus Naur Form grammar notation encoded in a dictionary with Spacy's custom attributes 431 | 432 | """ 433 | pattern_grammar[UNDERSCORE] = _symbol_stacker(EF, _get_features_per_token(extended_features[UNDERSCORE])) 434 | pattern_grammar[EF] = list(extended_features[UNDERSCORE].keys()) 435 | pattern_grammar.update(extended_features[UNDERSCORE].items()) 436 | pattern_grammar[T].append(UNDERSCORE) 437 | pattern_grammar[T].append(F + "," + UNDERSCORE) 438 | return pattern_grammar 439 | -------------------------------------------------------------------------------- /PatternOmatic/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/settings/__init__.py -------------------------------------------------------------------------------- /PatternOmatic/settings/config.py: -------------------------------------------------------------------------------- 1 | """ Configuration Management module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | from __future__ import annotations 22 | import configparser 23 | from typing import Optional 24 | from PatternOmatic.settings.log import LOG 25 | from PatternOmatic.settings.literals import GE, MAX_RUNS, SUCCESS_THRESHOLD, POPULATION_SIZE, MAX_GENERATIONS, \ 26 | CODON_LENGTH, CODONS_X_INDIVIDUAL, MUTATION_PROBABILITY, OFFSPRING_FACTOR, MATING_PROBABILITY, K_VALUE, \ 27 | SELECTION_TYPE, REPLACEMENT_TYPE, RECOMBINATION_TYPE, RecombinationType, ReplacementType, SelectionType, \ 28 | FitnessType, FITNESS_FUNCTION_TYPE, \ 29 | DGG, FEATURES_X_TOKEN, USE_BOOLEAN_FEATURES, USE_CUSTOM_ATTRIBUTES, USE_UNIQUES, \ 30 | USE_GRAMMAR_OPERATORS, USE_TOKEN_WILDCARD, USE_EXTENDED_PATTERN_SYNTAX, REPORT_PATH, IO, ReportFormat, REPORT_FORMAT 31 | 32 | 33 | class SingletonMetaNaive(type): 34 | """ The Naive Singleton Design Pattern of type Metaclass builder """ 35 | 36 | _instance: Optional[Config, None] = None 37 | 38 | def __call__(cls, config_file_path: str = None) -> Config: 39 | if cls._instance is None: 40 | LOG.debug('Creating config object!') 41 | cls._instance = super().__call__(config_file_path) 42 | return cls._instance 43 | 44 | def clear_instance(self): 45 | """ For testing purposes, destroy Singleton instance """ 46 | LOG.debug('Removing config object!') 47 | self._instance = None 48 | del self._instance 49 | 50 | 51 | class Config(metaclass=SingletonMetaNaive): 52 | """ Singleton Configuration package's Class""" 53 | __slots__ = ( 54 | 'max_runs', 55 | 'success_threshold', 56 | 'population_size', 57 | 'max_generations', 58 | 'codon_length', 59 | 'num_codons_per_individual', 60 | 'dna_length', 61 | 'mutation_probability', 62 | 'offspring_max_size_factor', 63 | 'mating_probability', 64 | 'k_value', 65 | 'selection_type', 66 | 'recombination_type', 67 | 'replacement_type', 68 | 'fitness_function_type', 69 | 'features_per_token', 70 | 'use_boolean_features', 71 | 'use_custom_attributes', 72 | 'use_uniques', 73 | 'use_grammar_operators', 74 | 'use_token_wildcard', 75 | 'use_extended_pattern_syntax', 76 | 'report_path', 77 | 'report_format', 78 | 'file_path' 79 | ) 80 | 81 | def __init__(self, config_file_path: str = None): 82 | """ 83 | Config object constructor 84 | Args: 85 | config_file_path: Path for a configuration file 86 | """ 87 | config_parser = configparser.ConfigParser() 88 | 89 | if config_file_path is None: 90 | LOG.warning(f'Configuration file not provided. Falling back to default values') 91 | self.file_path = None 92 | else: 93 | file_list = config_parser.read(config_file_path) 94 | if len(file_list) == 0: 95 | LOG.warning(f'File {config_file_path} not found. Falling back to default values') 96 | self.file_path = None 97 | else: 98 | self.file_path = config_file_path 99 | 100 | # 101 | # GE configuration parameters 102 | # 103 | self.max_runs = self._validate_config_argument(GE, MAX_RUNS, 4, config_parser) 104 | self.success_threshold = self._validate_config_argument(GE, SUCCESS_THRESHOLD, 0.8, config_parser) 105 | self.population_size = self._validate_config_argument(GE, POPULATION_SIZE, 10, config_parser) 106 | self.max_generations = self._validate_config_argument(GE, MAX_GENERATIONS, 3, config_parser) 107 | self.codon_length = self._validate_config_argument(GE, CODON_LENGTH, 8, config_parser) 108 | self.num_codons_per_individual = self._validate_config_argument(GE, CODONS_X_INDIVIDUAL, 4, config_parser) 109 | self.dna_length = self.codon_length * self.num_codons_per_individual 110 | self.mutation_probability = self._validate_config_argument(GE, MUTATION_PROBABILITY, 0.5, config_parser) 111 | self.offspring_max_size_factor = self._validate_config_argument(GE, OFFSPRING_FACTOR, 3.5, config_parser) 112 | self.mating_probability = self._validate_config_argument(GE, MATING_PROBABILITY, 0.9, config_parser) 113 | self.k_value = self._validate_config_argument(GE, K_VALUE, 3, config_parser) 114 | 115 | # 116 | # GE configuration methods 117 | # 118 | self.selection_type = SelectionType( 119 | self._validate_config_argument(GE, SELECTION_TYPE, 0, config_parser)) 120 | 121 | self.recombination_type = RecombinationType( 122 | self._validate_config_argument(GE, RECOMBINATION_TYPE, 0, config_parser)) 123 | 124 | self.replacement_type = ReplacementType( 125 | self._validate_config_argument(GE, REPLACEMENT_TYPE, 0, config_parser)) 126 | 127 | self.fitness_function_type = FitnessType( 128 | self._validate_config_argument(GE, FITNESS_FUNCTION_TYPE, 1, config_parser)) 129 | 130 | # 131 | # BNF Grammar Generation configuration options 132 | # 133 | self.features_per_token = self._validate_config_argument(DGG, FEATURES_X_TOKEN, 1, config_parser) 134 | self.use_boolean_features = self._validate_config_argument(DGG, USE_BOOLEAN_FEATURES, False, config_parser) 135 | self.use_custom_attributes = self._validate_config_argument(DGG, USE_CUSTOM_ATTRIBUTES, False, config_parser) 136 | self.use_uniques = self._validate_config_argument(DGG, USE_UNIQUES, True, config_parser) 137 | self.use_grammar_operators = self._validate_config_argument(DGG, USE_GRAMMAR_OPERATORS, False, config_parser) 138 | self.use_token_wildcard = self._validate_config_argument(DGG, USE_TOKEN_WILDCARD, False, config_parser) 139 | self.use_extended_pattern_syntax = \ 140 | self._validate_config_argument(DGG, USE_EXTENDED_PATTERN_SYNTAX, False, config_parser) 141 | 142 | # 143 | # Configuration validation 144 | # 145 | self._check_xps_op_restriction() 146 | 147 | # 148 | # IO 149 | # 150 | self.report_path = \ 151 | self._validate_config_argument(IO, REPORT_PATH, '/tmp/patternomatic_report.txt', config_parser) 152 | 153 | self.report_format = ReportFormat(self._validate_config_argument(IO, REPORT_FORMAT, 0, config_parser)) 154 | 155 | LOG.info(f'Configuration instance: {self}') 156 | 157 | def __setattr__(self, key, value) -> None: 158 | """ 159 | Overrides method to be used with slots 160 | Args: 161 | key: An object slotted property 162 | value: An intended value for the object key 163 | 164 | Returns: None 165 | 166 | """ 167 | if hasattr(self, key): 168 | if self._preserve_property_type(getattr(self, key), value): 169 | super(Config, self).__setattr__(key, value) 170 | LOG.info(f'Updating configuration parameter {key.upper()} with value {value}') 171 | if key == USE_EXTENDED_PATTERN_SYNTAX.lower() or key == USE_GRAMMAR_OPERATORS.lower(): 172 | self._check_xps_op_restriction() 173 | else: 174 | LOG.warning(f'Invalid data type {type(value)} for property {key}. Skipping update') 175 | else: 176 | super(Config, self).__setattr__(key, value) 177 | 178 | @property 179 | def __dict__(self): 180 | """ Hijacks dictionary for this config slotted class """ 181 | return {s: getattr(self, s, None) for s in self.__slots__} 182 | 183 | def __repr__(self): 184 | """ Representation of config instance """ 185 | return f'{self.__class__.__name__}({self.__dict__})' 186 | 187 | # 188 | # Utilities 189 | # 190 | @staticmethod 191 | def _validate_config_argument(section, option, default, config_parser): 192 | """ 193 | 194 | Args: 195 | section: 196 | option: 197 | default: 198 | config_parser: 199 | 200 | Returns: 201 | 202 | """ 203 | try: 204 | if isinstance(default, bool): 205 | value = config_parser.getboolean(section, option, fallback=default) 206 | elif isinstance(default, int): 207 | value = config_parser.getint(section, option, fallback=default) 208 | elif isinstance(default, float): 209 | value = config_parser.getfloat(section, option, fallback=default) 210 | elif isinstance(default, str): 211 | value = config_parser.get(section, option, fallback=default) 212 | else: 213 | value = default 214 | except ValueError: 215 | LOG.warning(f'[{section}][{option}] configuration parameter wrongly set. ' 216 | f'Falling back to its default value: {default}') 217 | value = default 218 | 219 | LOG.debug(f'[{section}][{option}] {value}') 220 | return value 221 | 222 | @staticmethod 223 | def _preserve_property_type(_property, value): 224 | return isinstance(value, type(_property)) 225 | 226 | # 227 | # Problem specific restrictions 228 | # 229 | def _check_xps_op_restriction(self) -> None: 230 | """ 231 | Spacy's Grammar Operators and Quantifiers and the Spacy's Extended Pattern Syntax can not be used together at 232 | the same time in a pattern for the Spacy's Rule Based Matcher. 233 | 234 | This method checks the provided configuration and disables the Spacy's Extended Pattern Syntax if both 235 | mechanisms are found enabled at the provided configuration. 236 | 237 | Returns: None 238 | 239 | """ 240 | if hasattr(self, USE_EXTENDED_PATTERN_SYNTAX.lower()) and hasattr(self, USE_GRAMMAR_OPERATORS.lower()) and \ 241 | self.use_extended_pattern_syntax is True and self.use_grammar_operators is True: 242 | LOG.warning(f'Extended Pattern Syntax is not compatible with the usage of Grammar Operators. ' 243 | f'Extended Pattern Syntax has been disabled!') 244 | self.use_extended_pattern_syntax = False 245 | -------------------------------------------------------------------------------- /PatternOmatic/settings/literals.py: -------------------------------------------------------------------------------- 1 | """ Literals/constants module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | from enum import Enum, unique 22 | 23 | 24 | # 25 | # GE Related literals 26 | # 27 | @unique 28 | class SelectionType(Enum): 29 | """ Evolutionary selection types """ 30 | BINARY_TOURNAMENT = 0 31 | K_TOURNAMENT = 1 32 | 33 | def __repr__(self): 34 | """ Human readable """ 35 | return self.name 36 | 37 | 38 | @unique 39 | class RecombinationType(Enum): 40 | """ Evolutionary recombination types enum """ 41 | RANDOM_ONE_POINT_CROSSOVER = 0 42 | 43 | def __repr__(self): 44 | """ Human readable """ 45 | return self.name 46 | 47 | 48 | @unique 49 | class ReplacementType(Enum): 50 | """ Evolutionary replacement types enum """ 51 | MU_PLUS_LAMBDA = 0 52 | MU_LAMBDA_WITH_ELITISM = 1 53 | MU_LAMBDA_WITHOUT_ELITISM = 2 54 | 55 | def __repr__(self): 56 | """ Human readable """ 57 | return self.name 58 | 59 | 60 | # Fitness types 61 | @unique 62 | class FitnessType(Enum): 63 | """ Fitness function type """ 64 | BASIC = 0 65 | FULL_MATCH = 1 66 | 67 | def __repr__(self): 68 | """ Human readable """ 69 | return self.name 70 | 71 | 72 | # 73 | # Dynamic grammar generation related literals 74 | # 75 | # Symbol delimiters 76 | SLD = '<' 77 | SRD = '>' 78 | # Grammar symbols 79 | S = SLD + 'S' + SRD 80 | P = SLD + 'P' + SRD 81 | T = SLD + 'T' + SRD 82 | F = SLD + 'F' + SRD 83 | # Feature symbols (base) 84 | ORTH = SLD + 'ORTH' + SRD 85 | TEXT = SLD + 'TEXT' + SRD 86 | LOWER = SLD + 'LOWER' + SRD 87 | LENGTH = SLD + 'LENGTH' + SRD 88 | POS = SLD + 'POS' + SRD 89 | TAG = SLD + 'TAG' + SRD 90 | DEP = SLD + 'DEP' + SRD 91 | LEMMA = SLD + 'LEMMA' + SRD 92 | SHAPE = SLD + 'SHAPE' + SRD 93 | ENT_TYPE = SLD + 'ENT_TYPE' + SRD 94 | # Feature symbols (base boolean) 95 | IS_ALPHA = SLD + 'IS_ALPHA' + SRD 96 | IS_ASCII = SLD + 'IS_ASCII' + SRD 97 | IS_DIGIT = SLD + 'IS_DIGIT' + SRD 98 | IS_LOWER = SLD + 'IS_LOWER' + SRD 99 | IS_UPPER = SLD + 'IS_UPPER' + SRD 100 | IS_TITLE = SLD + 'IS_TITLE' + SRD 101 | IS_PUNCT = SLD + 'IS_PUNCT' + SRD 102 | IS_SPACE = SLD + 'IS_SPACE' + SRD 103 | IS_STOP = SLD + 'IS_STOP' + SRD 104 | LIKE_NUM = SLD + 'LIKE_NUM' + SRD 105 | LIKE_URL = SLD + 'LIKE_URL' + SRD 106 | LIKE_EMAIL = SLD + 'LIKE_EMAIL' + SRD 107 | # Grammar operator and quantifier symbols 108 | OP = SLD + 'OP' + SRD 109 | NEGATION = '!' 110 | ZERO_OR_ONE = '?' 111 | ONE_OR_MORE = '+' 112 | ZERO_OR_MORE = '*' 113 | # Token wildcard 114 | TOKEN_WILDCARD = '{}' 115 | # Grammar extended pattern syntax 116 | XPS = SLD + 'XPS' + SRD 117 | IN = SLD + 'IN' + SRD 118 | NOT_IN = SLD + 'NOT_IN' + SRD 119 | EQQ = SLD + 'EQQ' + SRD 120 | GEQ = SLD + 'GEQ' + SRD 121 | LEQ = SLD + 'LEQ' + SRD 122 | GTH = SLD + 'GTH' + SRD 123 | LTH = SLD + 'LTH' + SRD 124 | XPS_AS = {EQQ: "==", GEQ: ">=", LEQ: "<=", GTH: ">", LTH: "<"} 125 | # Grammar custom attributes extension symbol 126 | UNDERSCORE = SLD + 'UNDERSCORE' + SRD 127 | EF = SLD + 'EF' + SRD 128 | ENT_ID = SLD + 'CUSTOM_ENT_ID_' + SRD 129 | ENT_IOB = SLD + 'CUSTOM_ENT_IOB_' + SRD 130 | ENT_KB_ID = SLD + 'CUSTOM_ENT_KB_ID_' + SRD 131 | HAS_VECTOR = SLD + 'CUSTOM_HAS_VECTOR' + SRD 132 | IS_BRACKET = SLD + 'CUSTOM_IS_BRACKET' + SRD 133 | IS_CURRENCY = SLD + 'CUSTOM_IS_CURRENCY' + SRD 134 | IS_LEFT_PUNCT = SLD + 'CUSTOM_IS_LEFT_PUNCT' + SRD 135 | IS_OOV = SLD + 'CUSTOM_IS_OOV' + SRD 136 | IS_QUOTE = SLD + 'CUSTOM_IS_QUOTE' + SRD 137 | IS_RIGHT_PUNCT = SLD + 'CUSTOM_IS_RIGHT_PUNCT' + SRD 138 | IS_SENT_START = SLD + 'CUSTOM_IS_SENT_START' + SRD 139 | LANG = SLD + 'CUSTOM_LANG_' + SRD 140 | NORM = SLD + 'CUSTOM_NORM_' + SRD 141 | PREFIX = SLD + 'CUSTOM_PREFIX_' + SRD 142 | PROB = SLD + 'CUSTOM_PROB' + SRD 143 | SENT_START = SLD + 'CUSTOM_SENT_START' + SRD 144 | SENTIMENT = SLD + 'CUSTOM_SENTIMENT' + SRD 145 | STRING = SLD + 'CUSTOM_STRING' + SRD 146 | SUFFIX = SLD + 'CUSTOM_SUFFIX_' + SRD 147 | TEXT_WITH_WS = SLD + 'CUSTOM_TEXT_WITH_WS' + SRD 148 | WHITESPACE = SLD + 'CUSTOM_WHITESPACE_' + SRD 149 | # Matcher's util 150 | MATCHER_SUPPORTED_ATTRIBUTES = ( 151 | 'orth_', 152 | 'text', 153 | 'lower_', 154 | 'pos_', 155 | 'tag_', 156 | 'dep_', 157 | 'lemma_', 158 | 'shape_', 159 | 'ent_type_', 160 | 'is_alpha', 161 | 'is_ascii', 162 | 'is_digit', 163 | 'is_lower', 164 | 'is_upper', 165 | 'is_title', 166 | 'is_punct', 167 | 'is_space', 168 | 'is_stop', 169 | 'like_num', 170 | 'like_url', 171 | 'like_email') 172 | 173 | # 174 | # Config ini literals 175 | # 176 | GE = 'GE' 177 | MAX_RUNS = 'MAX_RUNS' 178 | SUCCESS_THRESHOLD = 'SUCCESS_THRESHOLD' 179 | POPULATION_SIZE = 'POPULATION_SIZE' 180 | MAX_GENERATIONS = 'MAX_GENERATIONS' 181 | CODON_LENGTH = 'CODON_LENGTH' 182 | CODONS_X_INDIVIDUAL = 'CODONS_X_INDIVIDUAL' 183 | MUTATION_PROBABILITY = 'MUTATION_PROBABILITY' 184 | OFFSPRING_FACTOR = 'OFFSPRING_FACTOR' 185 | MATING_PROBABILITY = 'MATING_PROBABILITY' 186 | K_VALUE = 'K_VALUE' 187 | SELECTION_TYPE = 'SELECTION_TYPE' 188 | RECOMBINATION_TYPE = 'RECOMBINATION_TYPE' 189 | REPLACEMENT_TYPE = 'REPLACEMENT_TYPE' 190 | FITNESS_FUNCTION_TYPE = 'FITNESS_FUNCTION_TYPE' 191 | DGG = 'DGG' 192 | FEATURES_X_TOKEN = 'FEATURES_X_TOKEN' 193 | USE_BOOLEAN_FEATURES = 'USE_BOOLEAN_FEATURES' 194 | USE_UNIQUES = 'USE_UNIQUES' 195 | USE_GRAMMAR_OPERATORS = 'USE_GRAMMAR_OPERATORS' 196 | USE_TOKEN_WILDCARD = 'USE_TOKEN_WILDCARD' 197 | USE_EXTENDED_PATTERN_SYNTAX = 'USE_EXTENDED_PATTERN_SYNTAX' 198 | USE_CUSTOM_ATTRIBUTES = 'USE_CUSTOM_ATTRIBUTES' 199 | IO = 'IO' 200 | REPORT_PATH = 'REPORT_PATH' 201 | REPORT_FORMAT = 'REPORT_FORMAT' 202 | 203 | 204 | @unique 205 | class ReportFormat(Enum): 206 | """ Report format type """ 207 | JSON = 0 208 | CSV = 1 209 | 210 | def __repr__(self): 211 | """ Human readable """ 212 | return self.name 213 | -------------------------------------------------------------------------------- /PatternOmatic/settings/log.py: -------------------------------------------------------------------------------- 1 | """ Logging module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import logging 22 | import sys 23 | import tempfile 24 | from logging.handlers import TimedRotatingFileHandler 25 | 26 | FORMATTER = \ 27 | logging.Formatter('[%(levelname)s] %(asctime)s %(filename)s:%(funcName)s:%(lineno)d : %(message)s') 28 | 29 | LOG_FILE = tempfile.gettempdir() + '/patternomatic.log' 30 | 31 | 32 | def _get_console_handler(): 33 | """ 34 | Console handler logger 35 | Returns: 36 | 37 | """ 38 | console_handler = logging.StreamHandler(sys.stdout) 39 | console_handler.setFormatter(FORMATTER) 40 | return console_handler 41 | 42 | 43 | def _get_file_handler(): 44 | """ 45 | File handler logger 46 | Returns: 47 | 48 | """ 49 | file_handler = TimedRotatingFileHandler(LOG_FILE, when='midnight') 50 | file_handler.setFormatter(FORMATTER) 51 | return file_handler 52 | 53 | 54 | def get_logger(logger_name): 55 | """ 56 | Returns a set up logger 57 | Args: 58 | logger_name: Name of the logger 59 | 60 | Returns: logger 61 | 62 | """ 63 | logger = logging.getLogger(logger_name) 64 | logger.setLevel(logging.INFO) 65 | logger.addHandler(_get_console_handler()) 66 | logger.addHandler(_get_file_handler()) 67 | logger.propagate = False 68 | return logger 69 | 70 | 71 | LOG = get_logger('PatternOmatic') 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # PatternOmatic 0.2.* 4 | 5 | **\#AI · \#EvolutionaryComputation · \#NLP** 6 | 7 | [![Built with spaCy](https://img.shields.io/badge/made%20with%20❤%20and-spaCy-09a3d5.svg)](https://spacy.io) 8 | [![License: LGPL v3](https://img.shields.io/badge/License-LGPL%20v3-blue.svg)](https://www.gnu.org/licenses/lgpl-3.0) 9 | [![Build Status](https://travis-ci.org/revuel/PatternOmatic.svg?branch=master)](https://travis-ci.org/revuel/PatternOmatic) 10 | [![Sonar Coverage](https://img.shields.io/sonar/coverage/revuel_PatternOmatic?server=https%3A%2F%2Fsonarcloud.io)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic) 11 | [![Duplicated Lines (%)](https://sonarcloud.io/api/project_badges/measure?project=revuel_PatternOmatic&metric=duplicated_lines_density)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic) 12 | [![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=revuel_PatternOmatic&metric=sqale_rating)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic) 13 | [![GitHub repo size](https://img.shields.io/github/repo-size/revuel/PatternOmatic?color=teal)](#) 14 | [![Libraries.io SourceRank](https://img.shields.io/librariesio/sourcerank/pypi/PatternOmatic)](https://libraries.io/pypi/PatternOmatic/sourcerank) 15 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/PatternOmatic?color=FFDF00&label=downloads)](https://pypistats.org/packages/PatternOmatic) 16 | [![PyPI version](https://badge.fury.io/py/PatternOmatic.svg?color=red)](https://badge.fury.io/py/PatternOmatic) 17 | 18 | _Discover spaCy's linguistic patterns matching a given set of string samples_ 19 | 20 | ## Requirements 21 | - [Python 3.7.3](https://www.python.org/downloads/release/python-373/) 22 | - [Spacy 2.3.*](https://spacy.io/usage/v2-3) 23 | 24 | ## Basic usage 25 | 26 | ### From sources 27 | *[Clone SCM official repository](https://github.com/revuel/PatternOmatic)* 28 | 29 | `git clone git@github.com:revuel/PatternOmatic.git` 30 | 31 | *Play with Makefile* 32 | 33 | - `make venv` to activate project's [Virtual Environment*](https://docs.python.org/3.7/library/venv.html) 34 | - `make libs` to install dependencies 35 | - `make test` to run Unit Tests 36 | - `make coverage` to run Code Coverage 37 | - `make run` to run PatternOmatic's script with example parameters 38 | 39 | * you must have one first 40 | 41 | ### From package 42 | *Install package* 43 | 44 | `pip install PatternOmatic` 45 | 46 | *Play with the CLI* 47 | 48 | ``` 49 | # Show help 50 | patternomatic.py -h 51 | 52 | # Usage example 1: Basic 53 | patternomatic.py -s Hello world -s Goodbye world 54 | 55 | # Usage example 2: Using a different language 56 | python -m spacy download es_core_news_sm 57 | patternomatic.py -s Me llamo Miguel -s Se llama PatternOmatic -l es_core_news_sm 58 | ``` 59 | 60 | *Play with the library* 61 | ``` 62 | """ 63 | PatternOmatic library client example. 64 | Find linguistic patterns to be used by the spaCy Rule Based Matcher 65 | 66 | """ 67 | from PatternOmatic.api import find_patterns, Config 68 | 69 | if __name__ == '__main__': 70 | 71 | my_samples = ['I am a cat!', 'You are a dog!', 'She is an owl!'] 72 | 73 | # Optionally, let it evolve a little bit more! 74 | config = Config() 75 | config.max_generations = 150 76 | config.max_runs = 3 77 | 78 | patterns_found, _ = find_patterns(my_samples) 79 | 80 | print(f'Patterns found: {patterns_found}') 81 | 82 | ``` 83 | --- 84 | 85 | ## Features 86 | 87 | ### Generic 88 | 89 | ✅ No OS dependencies, no storage or database required! 90 | 91 | ✅ Lightweight package with just a little direct pip dependencies 92 | - [spaCy](https://pypi.org/project/spacy/2.3.2/) 93 | - [spaCy's en_core_web_sm Language Model](https://github.com/explosion/spacy-models/releases/tag/en_core_web_sm-2.3.0) 94 | 95 | ✅ Easy and highly configurable to boost clever searches 96 | 97 | ✅ Includes basic logging mechanism 98 | 99 | ✅ Includes basic reporting, JSON and CSV format supported. Report file path is configurable 100 | 101 | ✅ Configuration file example provided (config.ini) 102 | 103 | ✅ Default configuration is run if no configuration file provided 104 | 105 | ✅ Provides rollback actions against several possible misconfiguration scenarios 106 | 107 | ### Evolutionary 108 | 109 | ✅ Basic Evolutionary (Grammatical Evolution) parameters available and configurable 110 | 111 | ✅ Supports two different Evolutionary Fitness functions 112 | 113 | ✅ Supports Binary Tournament Evolutionary Selection Type 114 | 115 | ✅ Supports Random One Point Crossover Evolutionary Recombination Type 116 | 117 | ✅ Supports "µ + λ" Evolutionary Replacement Type 118 | 119 | ✅ Supports "µ ∪ λ" with elitism Evolutionary Replacement Type 120 | 121 | ✅ Supports "µ ∪ λ" without elitism Evolutionary Replacement Type 122 | 123 | ✅ Typical evolutionary performance metrics included: 124 | - Success Rate (SR) 125 | - Mean Best Fitness (MBF) 126 | - Average Evaluations to Solution (AES) 127 | 128 | ### Linguistic 129 | 130 | ✅ [Compatible with any spaCy Language Model](https://spacy.io/usage/models#languages) 131 | 132 | ✅ [Supports all spaCy's Rule Based Matcher standard Token attributes](https://spacy.io/usage/rule-based-matching#adding-patterns-attributes) 133 | 134 | ✅ [Supports the following spaCy's Rule Based Matcher non standard Token attributes](https://spacy.io/api/token#attributes) [(via underscore)](https://spacy.io/usage/processing-pipelines#custom-components-attributes) 135 | - ent_id 136 | - ent_iob 137 | - ent_kb_id 138 | - has_vector 139 | - is_bracket 140 | - is_currency 141 | - is_left_punct 142 | - is_oov 143 | - is_quote 144 | - is_right_punct 145 | - lang 146 | - norm 147 | - prefix 148 | - sentiment 149 | - string 150 | - suffix 151 | - text_with_ws 152 | - whitespace 153 | 154 | ✅ Supports skipping boolean Token attributes 155 | 156 | ✅ [Supports spaCy's Rule Based Matcher Extended Pattern Syntax](https://spacy.io/usage/rule-based-matching#adding-patterns-attributes-extended) 157 | 158 | ✅ [Supports spaCy's Rule Based Matcher Grammar Operators and Quantifiers](https://spacy.io/usage/rule-based-matching#quantifiers) 159 | 160 | ✅ [Supports Token Wildcard](https://spacy.io/usage/rule-based-matching#adding-patterns-wildcard) 161 | 162 | ✅ Supports defining the number of attributes per token within searched patterns 163 | 164 | ✅ Supports usage of non repeated token attribute values 165 | 166 | --- 167 | 168 | Author: [Miguel Revuelta Espinosa _(revuel)_](mailto:revuel22@hotmail.com "Contact author"), a humble AI enthusiastic 169 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | # - Configuration File example for "PatternOmatic" 2 | # 3 | # Grammatical Evolution (GE) parameters 4 | # 5 | [GE] 6 | # Number of runs per execution. This is the amount of new populations to spawn per execution. Integer greater than 0 7 | # Integer within interval [3, *) 8 | MAX_RUNS = 4 9 | 10 | # Minimum fitness value found in an execution to consider this a successful execution. 11 | # Float within interval [0.0, 1.0] 12 | SUCCESS_THRESHOLD = 0.8 13 | 14 | # Number of individuals per population 15 | # Integer within interval [4, *) 16 | POPULATION_SIZE = 100 17 | 18 | # Maximum number of generations per population in a run. 19 | # Integer within interval [1, *) 20 | MAX_GENERATIONS = 20 21 | 22 | # Number of gen per codon 23 | # Integer within interval [1, 16] 24 | CODON_LENGTH = 8 25 | 26 | # Number of codons per individual dna 27 | # Integer within the set (4, 8, 16) 28 | CODONS_X_INDIVIDUAL = 4 29 | 30 | # Mutation probability. Chance of mutating a gen within and individual birth, for every gen 31 | # Float within interval [0.0, 1.0] 32 | MUTATION_PROBABILITY = 0.5 33 | 34 | # Growth factor while generating offspring. 35 | # This factor appears in the literature. Do not edit this value. 36 | OFFSPRING_FACTOR = 3.5 37 | 38 | # Chances to produce offspring per individuals selection 39 | # Float within interval [0.0, 1.0] 40 | MATING_PROBABILITY = 0.9 41 | 42 | # Number of indivudals to compete where K_TOURNAMENT is the selection mode 43 | # Integer within interval [3, *) 44 | K_VALUE = 3 45 | 46 | # Selection type: 47 | # 0 = BINARY_TOURNAMENT 48 | # 1 = K_TOURNAMENT 49 | SELECTION_TYPE = 0 50 | 51 | # Recombination type: 52 | # 0 = RANDOM_ONE_POINT_CROSSOVER 53 | RECOMBINATION_TYPE = 0 54 | 55 | # Replacement type: 56 | # 0 = MU_PLUS_LAMBDA 57 | # 1 = MU_LAMBDA_WITH_ELITISM 58 | # 2 = MU_LAMBDA_WITHOUT_ELITISM 59 | REPLACEMENT_TYPE = 0 60 | 61 | # Fitness function type: 62 | # 0 = BASIC 63 | # 1 = FULL_MATCH 64 | FITNESS_FUNCTION_TYPE = 1 65 | 66 | # 67 | # Dynamic Grammar Generation (DGG) parameters 68 | # 69 | [DGG] 70 | # Features per token: 71 | # 0 or < 0 = unlimited 72 | # 1 or more until the maximum number of features = that number of features per token 73 | # > maximum number of features per token = the maximum number of features per token 74 | # 1 is the recommended value here 75 | FEATURES_X_TOKEN = 1 76 | 77 | # Use uniques: 78 | # True = Do not repeat features per production rule 79 | # False = Features can be repeated per production rule 80 | USE_UNIQUES = True 81 | 82 | # Use boolean features: 83 | # True = Enable the usage of Spacy's boolean token features (not recommended) 84 | # False = Disable the usage of Spacy's boolean token features (recommended) 85 | # This features show a highly positive correlation, which means they are not usefull for finding patterns 86 | USE_BOOLEAN_FEATURES = False 87 | 88 | # Use Grammar Operators: 89 | # True = Enable patterns with Spacy's Grammar Operators 90 | # False = Disable patterns with Spacy's Grammar Operators 91 | # Grammar Operators and Extended Pattern Syntax can not be enabled together 92 | USE_GRAMMAR_OPERATORS = False 93 | 94 | # Use Token Wildcard: 95 | # True = Enable patterns with Token Wildcard 96 | # False = Disable patterns with Token Wildcard 97 | USE_TOKEN_WILDCARD = False 98 | 99 | # Use Extended Pattern Syntax: 100 | # True = Enable patterns with Spacy's Extended Pattern Syntax 101 | # False = Disable patterns with Spacy's Extended Pattern Syntax 102 | # Grammar Operators and Extended Pattern Syntax can not be enabled together 103 | USE_EXTENDED_PATTERN_SYNTAX = False 104 | 105 | # Use Custom Features: 106 | # True = Enable patterns with underscore, where all the token's attributes not accepted by the Matcher are included 107 | # False = Disable patterns with underscore, where all the token's attributes not accepted by the Matcher are included 108 | USE_CUSTOM_ATTRIBUTES = False 109 | 110 | # 111 | # Operating System (OS) configuration options 112 | # 113 | [IO] 114 | # Valid OS path and filename to persist execution report 115 | REPORT_PATH = /tmp/patternOmatic_report.txt 116 | 117 | # Report format 118 | # 0 = json format 119 | # 1 = csv format 120 | REPORT_FORMAT = 0 121 | -------------------------------------------------------------------------------- /patternomatic_logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 9 | 15 | 18 | 21 | 24 | 26 | 29 | 31 | 33 | 36 | 43 | 46 | 48 | 54 | 60 | 66 | 71 | 75 | 79 | 84 | 90 | 95 | 101 | 105 | 109 | 112 | 115 | 117 | 120 | 123 | 127 | 129 | 131 | 134 | 139 | 142 | 144 | 148 | 151 | 153 | 156 | 160 | 164 | 167 | 170 | 173 | 177 | 180 | 184 | 187 | 190 | 193 | 197 | 201 | 204 | 206 | 209 | 213 | 217 | 219 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools==40.8.0 2 | pip==20.2.3 3 | coverage==5.0.3 4 | wheel==0.33.6 5 | importlib-metadata==2.0.0 6 | twine==3.2.0 7 | spacy==2.3.* 8 | https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.0/en_core_web_sm-2.3.0.tar.gz#egg=en_core_web_sm 9 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/patternomatic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ Command Line Interface module 3 | 4 | This file is part of PatternOmatic. 5 | 6 | Copyright © 2020 Miguel Revuelta Espinosa 7 | 8 | PatternOmatic is free software: you can redistribute it and/or 9 | modify it under the terms of the GNU Lesser General Public License 10 | as published by the Free Software Foundation, either version 3 of 11 | the License, or (at your option) any later version. 12 | 13 | PatternOmatic is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Lesser General Public License for more details. 17 | 18 | You should have received a copy of the GNU Lesser General Public License 19 | along with PatternOmatic. If not, see . 20 | 21 | """ 22 | import sys 23 | from typing import List 24 | from argparse import ArgumentParser 25 | from PatternOmatic.api import find_patterns 26 | from PatternOmatic.settings.log import LOG 27 | 28 | 29 | def main(args: List) -> None: 30 | """ 31 | PatternOmatic's script main function wrapper 32 | Args: 33 | args: Command Line Input Arguments 34 | 35 | Returns: None 36 | 37 | """ 38 | LOG.info('Parsing command line arguments...') 39 | try: 40 | cli = ArgumentParser( 41 | description='Finds the Spacy\'s Matcher pattern for the given samples', 42 | epilog='...using actual Artificial Intelligence' 43 | ) 44 | 45 | # Samples 46 | cli.add_argument( 47 | '-s', 48 | '--sample', 49 | action='append', 50 | required=True, 51 | nargs='+', 52 | type=str, 53 | help='A sample phrase' 54 | ) 55 | 56 | # Spacy Language Model 57 | cli.add_argument( 58 | '-l', 59 | '--language', 60 | nargs='?', 61 | type=str, 62 | default='en_core_web_sm', 63 | help='Spacy language model to be used' 64 | ) 65 | 66 | # Configuration file to be used 67 | cli.add_argument( 68 | '-c', 69 | '--config', 70 | nargs='?', 71 | type=str, 72 | help='Configuration file path to be used', 73 | default=None, 74 | ) 75 | 76 | # Parse command line input arguments/options 77 | parsed_args = cli.parse_args(args) 78 | 79 | # Join sample arguments 80 | for index, item in enumerate(parsed_args.sample): 81 | parsed_args.sample[index] = ' '.join(item) 82 | 83 | # 84 | # Find patterns 85 | # 86 | patterns_found, _ = find_patterns( 87 | parsed_args.sample, 88 | configuration=parsed_args.config, 89 | spacy_language_model_name=parsed_args.language) 90 | 91 | LOG.info(f'Patterns found: {patterns_found}') 92 | 93 | except Exception as ex: 94 | LOG.critical(f'Fatal error: {repr(ex)}') 95 | raise ex 96 | 97 | 98 | # 99 | # OS INPUT 100 | # 101 | if __name__ == '__main__': \ 102 | main(sys.argv[1:]) 103 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ Setup tools (build distribution) module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import setuptools 22 | 23 | with open("README.md", "r") as fh: 24 | long_description = fh.read() 25 | 26 | setuptools.setup( 27 | name="PatternOmatic", 28 | version="0.2.3", 29 | author="Miguel Revuelta Espinosa", 30 | author_email="revuel22@hotmail.com", 31 | description="AI/NLP (Spacy) Rule Based Matcher pattern finder", 32 | long_description=long_description, 33 | long_description_content_type="text/markdown", 34 | url="https://github.com/revuel/PatternOmatic", 35 | packages=setuptools.find_packages(), 36 | scripts=['scripts/patternomatic.py'], 37 | install_requires=[ 38 | 'spacy==2.3.0' 39 | ], 40 | classifiers=[ 41 | "Programming Language :: Python :: 3", 42 | "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", 43 | "Operating System :: OS Independent", 44 | ], 45 | python_requires='>=3.7', 46 | ) 47 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | """ Unit testing file for API module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import os 22 | import spacy 23 | from unittest import TestCase, mock 24 | from PatternOmatic.api import find_patterns 25 | from PatternOmatic.settings.config import Config 26 | from PatternOmatic.settings.log import LOG 27 | 28 | 29 | class Test(TestCase): 30 | 31 | my_samples = ['Hello world!', 'Goodbye world!'] 32 | 33 | def test_find_patterns_when_only_samples_provided(self): 34 | """ Tests that providing just samples makes the find_pattern keeps working """ 35 | patterns, _ = find_patterns(self.my_samples) 36 | super().assertEqual(4, len(patterns)) 37 | 38 | def test_find_patterns_when_valid_configuration_file_provided(self): 39 | """ Checks that providing a valid configuration file path loads configuration from that file """ 40 | 41 | config_file_path = \ 42 | os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini') 43 | _ = find_patterns(self.my_samples, configuration=config_file_path) 44 | super().assertEqual(config_file_path, Config().file_path) 45 | 46 | def test_find_patterns_when_config_instance_provided(self): 47 | """ Checks when setting up a Config instance before find_patterns invocation works """ 48 | config = Config() 49 | config.max_runs = 10 50 | patterns, _ = find_patterns(self.my_samples) 51 | super().assertEqual(10, len(patterns)) 52 | 53 | def test_find_patterns_when_bad_language_provided(self): 54 | """ Checks that providing an imaginary language model makes find_patterns use en_core_web_sm """ 55 | with super().assertLogs(LOG) as cm: 56 | bad_model = 'Something' 57 | _ = find_patterns(self.my_samples, spacy_language_model_name=bad_model) 58 | super().assertEqual(f'WARNING:PatternOmatic:Model {bad_model} not found, falling back to ' 59 | f'patternOmatic\'s default language model: en_core_web_sm', cm.output[1]) 60 | 61 | def test_installs_en_core_web_sm_if_not_found(self): 62 | """ Due to questionable PyPI security policies, check en_core_web_sm installation is fired if not present """ 63 | nlp = spacy.load('en_core_web_sm') 64 | 65 | with mock.patch('PatternOmatic.api.pkg_resources.working_set') as patch_working_set: 66 | with mock.patch('PatternOmatic.api.spacy_download') as patch_spacy_download: 67 | with mock.patch('PatternOmatic.api.spacy_load') as patch_spacy_load: 68 | patch_working_set.return_value = [] 69 | patch_spacy_download.return_value = 'I\'ve been fired' 70 | patch_spacy_load.return_value = nlp 71 | find_patterns(['Hi']) 72 | super().assertTrue(patch_spacy_download.called) 73 | 74 | def tearDown(self) -> None: 75 | """ Destroy Config instance """ 76 | Config.clear_instance() 77 | -------------------------------------------------------------------------------- /tests/test_bnf.py: -------------------------------------------------------------------------------- 1 | """ Unit testing file for BNF module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import unittest 22 | import spacy 23 | from spacy.tokens.doc import Underscore 24 | 25 | import PatternOmatic.nlp.bnf as bnf 26 | from PatternOmatic.settings.literals import S, P, T, F, OP, NEGATION, ZERO_OR_ONE, ZERO_OR_MORE, ONE_OR_MORE, XPS, IN,\ 27 | NOT_IN, EQQ, GEQ, LEQ, GTH, LTH, TOKEN_WILDCARD, UNDERSCORE, ORTH, TEXT, LOWER, POS, TAG, DEP, LEMMA, SHAPE, \ 28 | IS_ASCII, IS_UPPER, HAS_VECTOR 29 | from PatternOmatic.settings.config import Config 30 | 31 | 32 | class TestDG(unittest.TestCase): 33 | """ Test class for Dynamic Grammar """ 34 | 35 | nlp = spacy.load('en_core_web_sm') 36 | samples = [nlp(u'This is a test.'), nlp(u'Checks for Backus Naur Form grammars')] 37 | config = None 38 | 39 | def test_basic_grammar_dg(self): 40 | """ Tests that basic grammar is correctly generated """ 41 | grammar = bnf.dynamic_generator(self.samples) 42 | 43 | super().assertIn(P, grammar.keys()) 44 | super().assertIn(S, grammar.keys()) 45 | super().assertIn(T, grammar.keys()) 46 | super().assertIn(F, grammar.keys()) 47 | super().assertEqual(len(grammar[SHAPE]), 7) 48 | super().assertEqual(len(grammar[F]), 9) 49 | 50 | def test_basic_grammar_without_uniques_dg(self): 51 | """ Tests that basic grammar is correctly generated when use uniques is false """ 52 | self.config.use_uniques = False 53 | grammar = bnf.dynamic_generator(self.samples) 54 | 55 | super().assertEqual(len(grammar[SHAPE]), 11) 56 | 57 | def test_basic_grammar_with_booleans_dg(self): 58 | """ Tests that basic grammar with booleans is correctly generated """ 59 | self.config.use_boolean_features = True 60 | grammar = bnf.dynamic_generator(self.samples) 61 | 62 | super().assertIn(IS_ASCII, grammar.keys()) 63 | super().assertIn(IS_UPPER, grammar.keys()) 64 | 65 | def test_basic_grammar_with_booleans_and_operators_dg(self): 66 | """ Tests that basic grammar with boolean features and operators is correctly generated """ 67 | self.config.use_boolean_features = True 68 | self.config.use_grammar_operators = True 69 | 70 | grammar = bnf.dynamic_generator(self.samples) 71 | 72 | super().assertIn(IS_ASCII, grammar.keys()) 73 | super().assertIn(IS_UPPER, grammar.keys()) 74 | super().assertIn(OP, grammar.keys()) 75 | super().assertListEqual(grammar[OP], [NEGATION, ZERO_OR_ONE, ONE_OR_MORE, ZERO_OR_MORE]) 76 | 77 | def test_basic_grammar_with_booleans_and_extended_pattern_syntax_dg(self): 78 | """ Tests that basic grammar with boolean features and extended pattern syntax is correctly generated """ 79 | self.config.use_boolean_features = True 80 | self.config.use_extended_pattern_syntax = True 81 | 82 | grammar = bnf.dynamic_generator(self.samples) 83 | 84 | super().assertIn(IS_ASCII, grammar.keys()) 85 | super().assertIn(IS_UPPER, grammar.keys()) 86 | super().assertIn(XPS, grammar.keys()) 87 | super().assertListEqual(grammar[XPS], [IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH]) 88 | 89 | def test_basic_grammar_with_booleans_and_custom_attributes_dg(self): 90 | """ Tests that basic grammar with boolean features and custom attributes is correctly generated """ 91 | self.config.use_boolean_features = True 92 | self.config.use_custom_attributes = True 93 | 94 | grammar = bnf.dynamic_generator(self.samples) 95 | 96 | super().assertIn(IS_ASCII, grammar.keys()) 97 | super().assertIn(IS_UPPER, grammar.keys()) 98 | super().assertIn(UNDERSCORE, grammar.keys()) 99 | # super().assertIn(IS_SENT_START, grammar.keys()) 100 | super().assertIn(HAS_VECTOR, grammar.keys()) 101 | 102 | def test_basic_grammar_with_token_wildcard_dg(self): 103 | """ Tests grammar is generated with token wildcard """ 104 | self.config.use_token_wildcard = True 105 | 106 | grammar = bnf.dynamic_generator(self.samples) 107 | 108 | super().assertIn(TOKEN_WILDCARD, grammar[T]) 109 | 110 | def test_get_features_per_token(self): 111 | """ Tests that the number of features per token is properly set given different configurations """ 112 | features_dict = {ORTH: None, TEXT: None, LOWER: None, POS: None, TAG: None, LEMMA: None} 113 | len_features_dict = len(features_dict.keys()) 114 | 115 | # When features_per_token is equal or lower to 0, the maximum number of features per token is set 116 | self.config.features_per_token = 0 117 | super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict)) 118 | self.config.features_per_token = -100 119 | super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict)) 120 | 121 | # When features_per_token is greater than the actual features, the maximum number of features per token is set 122 | self.config.features_per_token = 100 123 | super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict)) 124 | 125 | # When features_per_token is inside the range (0, actual features), the config parameter is respected 126 | self.config.features_per_token = 3 127 | super().assertEqual(3, bnf._get_features_per_token(features_dict)) 128 | 129 | def test_symbol_stacker(self): 130 | """ Tests that symbols are stacked properly """ 131 | expected_1 = [DEP, DEP + ',' + DEP, DEP + ',' + DEP + ',' + DEP] 132 | super().assertListEqual(expected_1, bnf._symbol_stacker(DEP, 3)) 133 | 134 | expected_2 = [DEP + ',' + DEP, 135 | DEP + ',' + DEP + ',' + DEP, 136 | DEP + ',' + DEP + ',' + DEP + ',' + DEP] 137 | 138 | super().assertListEqual(expected_2, bnf._symbol_stacker(DEP, 4, 2)) 139 | 140 | expected_2.insert(0, DEP) 141 | 142 | super().assertListEqual(expected_2, bnf._symbol_stacker(DEP, 4, 5)) 143 | 144 | super().assertListEqual([expected_1[2]], bnf._symbol_stacker(DEP, 3, 3)) 145 | 146 | # 147 | # Helpers 148 | # 149 | def setUp(self) -> None: 150 | """ Fresh Config instance """ 151 | self.config = Config() 152 | 153 | def tearDown(self) -> None: 154 | """ Destroy Config instance, reset Underscore's token extensions """ 155 | Config.clear_instance() 156 | Underscore.token_extensions = {} 157 | 158 | 159 | if __name__ == "__main__": 160 | unittest.main() 161 | -------------------------------------------------------------------------------- /tests/test_individual.py: -------------------------------------------------------------------------------- 1 | """ Unit testing module for GE Individual module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import unittest 22 | import spacy 23 | 24 | from PatternOmatic.ge.stats import Stats 25 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg 26 | from PatternOmatic.ge.individual import Individual, Fitness 27 | from PatternOmatic.settings.config import Config 28 | from PatternOmatic.settings.literals import FitnessType, S, P, T, F, ORTH, TOKEN_WILDCARD, UNDERSCORE, IS_CURRENCY, \ 29 | NOT_IN, ZERO_OR_MORE, OP, GTH, XPS, IN 30 | 31 | 32 | class TestIndividual(unittest.TestCase): 33 | """ Unit Test class for GE Individual object """ 34 | config = Config() 35 | 36 | nlp = spacy.load("en_core_web_sm") 37 | 38 | samples = [nlp(u'I am a raccoon!'), 39 | nlp(u'You are a cat!'), 40 | nlp(u'Is she a rabbit?'), 41 | nlp(u'This is a test')] 42 | 43 | grammar = dgg(samples) 44 | 45 | stats = Stats() 46 | 47 | def test_init(self): 48 | """ Test that Individual instantiation works """ 49 | i = Individual(self.samples, self.grammar, self.stats) 50 | super().assertIs(type(i), Individual) 51 | 52 | def test_init_with_dna(self): 53 | """ Test that Individual instantiation works when providing dna""" 54 | i = Individual(self.samples, self.grammar, self.stats, '10101010101010101010101010101010') 55 | super().assertNotEqual(i, None) 56 | 57 | def test_transcription(self): 58 | """ Check for transcription idempotency """ 59 | self.config.mutation_probability = 0.0 60 | i = Individual(self.samples, self.grammar, self.stats, '11111111') 61 | i._transcription() 62 | i._transcription() 63 | i._transcription() 64 | 65 | super().assertListEqual(i.int_genotype, [127, 1]) 66 | 67 | def test_translation(self): 68 | """ Check for translation idempotency """ 69 | self.config.mutation_probability = 0.0 70 | i = Individual(self.samples, self.grammar, self.stats, '11111111') 71 | i._translation() 72 | i._translation() 73 | i._translation() 74 | super().assertListEqual( 75 | i.fenotype, [{'TEXT': 'am'}, {'TEXT': '?'}, {'TEXT': 'am'}, {'TEXT': '?'}, {'TEXT': 'am'}]) 76 | 77 | def test_mutation(self): 78 | """ Checks that mutation works """ 79 | self.config.mutation_probability = 1.0 80 | i = Individual(self.samples, self.grammar, self.stats, '11111111') 81 | super().assertNotEqual(i.bin_genotype, '11111111') 82 | 83 | def test_fitness_basic(self): 84 | """ Fitness "basic" sets fitness """ 85 | self.config.mutation_probability = 0.0 86 | self.config.fitness_function_type = FitnessType.BASIC 87 | i = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101') 88 | 89 | super().assertEqual(i.fitness_value, 0.25) 90 | 91 | def test_fitness_full_match(self): 92 | """ Fitness "full match" sets fitness """ 93 | self.config.mutation_probability = 0.0 94 | self.config.fitness_function_type = FitnessType.FULL_MATCH 95 | i = Individual(self.samples, self.grammar, self.stats, '01101010100001101000110111000100') 96 | 97 | super().assertEqual(i.fitness_value, 0.25) 98 | 99 | def test_token_wildcard_penalty(self): 100 | """ Checks that token wildcard penalty is properly set """ 101 | # When using token wildcard, penalty is applied 102 | f = object.__new__(Fitness) 103 | f.fenotype = [{}, {}, {}, 'Whatever'] 104 | self.config.use_token_wildcard = True 105 | f.config = self.config 106 | super().assertEqual(0.25, f._wildcard_penalty(1.0)) 107 | 108 | # When not using token wildcard, penalty is not applied 109 | self.config.use_token_wildcard = False 110 | f.fenotype = 1.0 111 | super().assertEqual(1.0, f._wildcard_penalty(1.0)) 112 | 113 | def test_translate(self): 114 | """ Verifies conversions over the BNF are done correctly """ 115 | i = object.__new__(Individual) 116 | 117 | # Root 118 | i.grammar = {S: [P]} 119 | super().assertEqual('"S":"

"', i._translate(0, S, S)) 120 | 121 | # Pattern root symbol to Token symbol 122 | i.grammar = {P: [T]} 123 | super().assertEqual(T, i._translate(0, P, P)) 124 | 125 | # Token symbol to Feature symbol inside Token 126 | i.grammar = {T: [F]} 127 | super().assertEqual('{}', i._translate(0, T, T)) 128 | 129 | # Token symbol to wildcard 130 | i.grammar = {T: [TOKEN_WILDCARD]} 131 | super().assertEqual('{}', i._translate(0, T, T)) 132 | 133 | # Feature symbol to specific symbol 134 | i.grammar = {F: [ORTH]} 135 | super().assertEqual('{}', i._translate(0, F, '{}')) 136 | 137 | # Basic Terminal conversion 138 | i.grammar = {ORTH: ['Test']} 139 | super().assertEqual('{"ORTH":"Test"}', i._translate(0, ORTH, '{}')) 140 | 141 | # Underscore conversion 142 | i.grammar = {UNDERSCORE: [IS_CURRENCY]} 143 | super().assertEqual('{"_": {}}', i._translate(0, UNDERSCORE, '{}')) 144 | 145 | # Underscore terminal conversion 146 | i.grammar = {IS_CURRENCY: [True]} 147 | super().assertEqual('{"_": {"CUSTOM_IS_CURRENCY":"True"}}', 148 | i._translate(0, IS_CURRENCY, '{"_": {}}')) 149 | 150 | # Grammar Operators conversion 151 | i.grammar = {OP: ZERO_OR_MORE} 152 | super().assertEqual('"OP":"*"', i._translate(0, OP, '')) 153 | 154 | # Extended Pattern Syntax conversion (base) 155 | i.grammar = {XPS: [IN]} 156 | super().assertEqual('{}', i._translate(0, XPS, '')) 157 | 158 | i.grammar = {ORTH: [XPS]} 159 | super().assertEqual('"ORTH":', i._translate(0, ORTH, '')) 160 | 161 | # Extended Pattern Syntax conversion (terminal logical) 162 | i.grammar = {NOT_IN: [['Test']]} 163 | super().assertEqual('{"ORTH": {"NOT_IN":["Test"]}}', i._translate(0, NOT_IN, '{"ORTH": {}}')) 164 | 165 | # Extended Pattern Syntax (terminal arithmetical) 166 | i.grammar = {GTH: [5]} 167 | super().assertEqual('{"LENGTH": {">":5}}', i._translate(0, GTH, '{"LENGTH": {}}')) 168 | 169 | # 170 | # Helpers 171 | # 172 | def setUp(self) -> None: 173 | """ Fresh Config instance """ 174 | self.config = Config() 175 | 176 | def tearDown(self) -> None: 177 | """ Destroy Config instance """ 178 | Config.clear_instance() 179 | 180 | 181 | if __name__ == "__main__": 182 | unittest.main() 183 | -------------------------------------------------------------------------------- /tests/test_population.py: -------------------------------------------------------------------------------- 1 | """ Unit testing module for GE Population module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import unittest 22 | import spacy 23 | 24 | from PatternOmatic.ge.stats import Stats 25 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg 26 | from PatternOmatic.ge.population import Population, Selection, Recombination, Replacement 27 | from PatternOmatic.ge.individual import Individual 28 | from PatternOmatic.settings.config import Config 29 | from PatternOmatic.settings.literals import FitnessType, SelectionType, RecombinationType, ReplacementType 30 | 31 | 32 | class BasePopulationTest(unittest.TestCase): 33 | """ Base class to supply shard attributes and helpers """ 34 | # 35 | # Shared attributes 36 | # 37 | config = Config() 38 | 39 | nlp = spacy.load("en_core_web_sm") 40 | 41 | samples = [nlp(u'I am a raccoon!'), 42 | nlp(u'You are a cat!'), 43 | nlp(u'Is she a rabbit?'), 44 | nlp(u'This is a test')] 45 | 46 | grammar = dgg(samples) 47 | 48 | stats = Stats() 49 | 50 | # 51 | # Helpers 52 | # 53 | def setUp(self) -> None: 54 | """ Fresh Config instance """ 55 | self.config = Config() 56 | 57 | def tearDown(self) -> None: 58 | """ Destroy Config instance """ 59 | Config.clear_instance() 60 | 61 | 62 | class TestPopulation(BasePopulationTest): 63 | """ Unit Test class for GE Population object """ 64 | 65 | def test_initialize(self): 66 | """ Tests that a population is correctly filled with Individuals """ 67 | p = Population(self.samples, self.grammar, self.stats) 68 | 69 | super().assertIsInstance(p.generation[0], Individual) 70 | 71 | def test_best_challenge(self): 72 | """ Tests that the most fitted individual occupies the population's best_individual slot """ 73 | self.config.max_generations = 3 74 | self.config.fitness_function_type = FitnessType.BASIC 75 | p = Population(self.samples, self.grammar, self.stats) 76 | self.config.mutation_probability = 0.0 77 | p.generation[0] = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101') 78 | self.config.mutation_probability = 0.5 79 | p.evolve() 80 | 81 | super().assertGreaterEqual(p.best_individual.fitness_value, 0.2) 82 | 83 | def test_binary_tournament(self): 84 | """ Test that binary tournament works as expected """ 85 | self.config.max_generations = 3 86 | self.config.fitness_function_type = FitnessType.FULL_MATCH 87 | self.config.selection_type = SelectionType.BINARY_TOURNAMENT 88 | p = Population(self.samples, self.grammar, self.stats) 89 | mating_pool = p.selection(p.generation) 90 | 91 | super().assertNotEqual(p.generation, mating_pool) 92 | 93 | def test_k_tournament(self): 94 | """ Test that k tournament raises error """ 95 | self.config.selection_type = SelectionType.K_TOURNAMENT 96 | p = Population(self.samples, self.grammar, self.stats) 97 | with super().assertRaises(NotImplementedError): 98 | _ = p.selection(p.generation) 99 | 100 | def test_random_one_point_crossover(self): 101 | """ Test that crossover 'random one point' works as expected """ 102 | self.config.max_generations = 3 103 | self.config.fitness_function_type = FitnessType.BASIC 104 | self.config.selection_type = SelectionType.BINARY_TOURNAMENT 105 | self.config.recombination_type = RecombinationType.RANDOM_ONE_POINT_CROSSOVER 106 | p = Population(self.samples, self.grammar, self.stats) 107 | mating_pool = p.selection(p.generation) 108 | p.offspring = p.recombination(mating_pool, p.generation) 109 | super().assertNotEqual(p.generation, p.offspring) 110 | 111 | def test_mu_plus_lambda(self): 112 | """ Tests that replacement 'mu plus lambda' works as expected """ 113 | self.config.replacement_type = ReplacementType.MU_PLUS_LAMBDA 114 | p = Population(self.samples, self.grammar, self.stats) 115 | mating_pool = p.selection(p.generation) 116 | p.offspring = p.recombination(mating_pool, p.generation) 117 | p.generation, p.offspring = p.replacement(p.generation, p.offspring) 118 | super().assertListEqual(p.offspring, []) 119 | 120 | def test_mu_lambda_elite(self): 121 | """ Tests that replacement 'mu lambda with elitism' works as expected """ 122 | self.config.replacement_type = ReplacementType.MU_LAMBDA_WITH_ELITISM 123 | p = Population(self.samples, self.grammar, self.stats) 124 | mating_pool = p.selection(p.generation) 125 | p.offspring = p.recombination(mating_pool, p.generation) 126 | p.generation, p.offspring = p.replacement(p.generation, p.offspring) 127 | super().assertListEqual(p.offspring, []) 128 | 129 | def test_mu_lambda_no_elite(self): 130 | """ Tests that replacement 'mu lambda without elitism' works as expected """ 131 | self.config.replacement_type = ReplacementType.MU_LAMBDA_WITHOUT_ELITISM 132 | p = Population(self.samples, self.grammar, self.stats) 133 | mating_pool = p.selection(p.generation) 134 | p.offspring = p.recombination(mating_pool, p.generation) 135 | p.generation, p.offspring = p.replacement(p.generation, p.offspring) 136 | super().assertListEqual(p.offspring, []) 137 | 138 | def test_evolve(self): 139 | """ Tests that an evolution works, preserving a fitted individual """ 140 | self.config.max_generations = 3 141 | self.config.fitness_function_type = FitnessType.BASIC 142 | p = Population(self.samples, self.grammar, self.stats) 143 | self.config.mutation_probability = 0.0 144 | p.generation[0] = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101') 145 | self.config.mutation_probability = 0.5 146 | p.evolve() 147 | super().assertLessEqual(0.25, p.generation[0].fitness_value) 148 | 149 | def test_best_challenge_changes_best_individual(self): 150 | """ Covers best challenge cases """ 151 | self.config.mutation_probability = 0.0 152 | self.config.fitness_function_type = FitnessType.BASIC 153 | 154 | p = Population(self.samples, self.grammar, self.stats) 155 | i1 = Individual(self.samples, self.grammar, self.stats, dna='00000000000000000000000000000000') 156 | i2 = Individual(self.samples, self.grammar, self.stats, dna='01110101100101100110010110010101') 157 | 158 | # When there's no best individual yet, population's best individual is updated 159 | p.best_individual = None 160 | p.generation = [i2] 161 | p._best_challenge() 162 | 163 | super().assertEqual(p.best_individual, p.generation[0]) 164 | 165 | # When a better individual is better fitted in a new generation, population's best individual is updated 166 | p.best_individual = i1 167 | p.generation = [i2] 168 | p._best_challenge() 169 | 170 | super().assertEqual(p.best_individual, p.generation[0]) 171 | 172 | # When a worse individual is the most fitted in a new generation, population's best individual remains the same 173 | p.best_individual = i2 174 | p.generation = [i1] 175 | p._best_challenge() 176 | 177 | super().assertEqual(i2, p.best_individual) 178 | 179 | def test_sr_update(self): 180 | """ Check SR is updated if a solution is found for the run """ 181 | stats = Stats() 182 | 183 | self.config.max_generations = 1 184 | self.config.population_size = 3 185 | self.config.fitness_function_type = FitnessType.BASIC 186 | self.config.mutation_probability = 0.0 187 | 188 | self.config.success_threshold = 0.0 189 | p = Population(self.samples, self.grammar, stats) 190 | p.generation[0] = Individual(self.samples, self.grammar, stats, '01110101100101100110010110010101') 191 | p.evolve() 192 | super().assertListEqual([True], stats.success_rate_accumulator) 193 | 194 | self.config.success_threshold = 1.0 195 | self.config.population_size = 1 196 | p = Population(self.samples, self.grammar, stats) 197 | p.generation[0] = Individual(self.samples, self.grammar, stats, '00000000000000000000000000000000') 198 | p.evolve() 199 | super().assertListEqual([True, False], stats.success_rate_accumulator) 200 | 201 | 202 | class TestSelection(BasePopulationTest): 203 | """ Unit Test class for GE Selection object """ 204 | 205 | def test_dispatch(self): 206 | """ Dispatcher method provides the proper selection method """ 207 | selection = Selection(SelectionType.BINARY_TOURNAMENT) 208 | super().assertIs(selection._select, Selection._binary_tournament) 209 | 210 | selection = Selection(SelectionType.K_TOURNAMENT) 211 | super().assertIs(selection._select, Selection._k_tournament) 212 | 213 | # Check unknown SelectionType 214 | selection = Selection(None) 215 | super().assertIs(selection._select, Selection._binary_tournament) 216 | 217 | 218 | class TestRecombination(BasePopulationTest): 219 | """ Unit Test class for GE Recombination object """ 220 | 221 | def test_dispatch(self): 222 | """ Dispatcher method provides the proper recombine method """ 223 | recombination = Recombination(self.grammar, self.samples, self.stats) 224 | super().assertEqual(recombination._recombine, recombination._random_one_point_crossover) 225 | 226 | 227 | class TestReplacement(BasePopulationTest): 228 | """ Unit Test class for GE Replacement object """ 229 | 230 | def test_dispatch(self): 231 | """ Dispatcher method provides the proper replacement method """ 232 | replacement = Replacement(ReplacementType.MU_PLUS_LAMBDA) 233 | super().assertIs(replacement._replace, Replacement._mu_plus_lambda) 234 | 235 | replacement = Replacement(ReplacementType.MU_LAMBDA_WITH_ELITISM) 236 | super().assertIs(replacement._replace, Replacement._mu_lambda_elite) 237 | 238 | replacement = Replacement(ReplacementType.MU_LAMBDA_WITHOUT_ELITISM) 239 | super().assertIs(replacement._replace, Replacement._mu_lambda_no_elite) 240 | 241 | # Check unknown ReplacementType 242 | replacement = Replacement(None) 243 | super().assertIs(replacement._replace, Replacement._mu_plus_lambda) 244 | 245 | 246 | if __name__ == "__main__": 247 | unittest.main() 248 | -------------------------------------------------------------------------------- /tests/test_script.py: -------------------------------------------------------------------------------- 1 | """ Unit testing file for CLI module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import os 22 | import scripts.patternomatic as pom 23 | 24 | from unittest import TestCase, mock 25 | from spacy import load as spacy_load 26 | from PatternOmatic.settings.log import LOG 27 | 28 | 29 | class TestPatternomaticScript(TestCase): 30 | """ Test class to verify patternomatic.py correct behaviour """ 31 | 32 | nlp = spacy_load('en_core_web_sm') 33 | 34 | samples = [nlp(u'My shirt is white'), 35 | nlp(u'My cat is black'), 36 | nlp(u'Your home is comfortable'), 37 | nlp(u'Their attitude is great')] 38 | 39 | config_file_path = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini') 40 | 41 | full_args = ['-s', 'Hello', '-s', 'Goodbye', '-c', config_file_path, '-l', 'en_core_web_sm'] 42 | 43 | def test_main(self): 44 | """ Checks that main method works """ 45 | with super().assertLogs(LOG) as cm: 46 | pom.main(self.full_args) 47 | super().assertIn('INFO:PatternOmatic:Best individuals for this execution:', cm.output) 48 | 49 | def test_main_errors_raised(self): 50 | """ Checks that main raises errors when bad arguments are supplied """ 51 | # No args 52 | with super().assertRaises(SystemExit): 53 | pom.main([]) 54 | 55 | # Wrong args 56 | with super().assertRaises(SystemExit): 57 | pom.main(['-k']) 58 | 59 | # Wrong lang 60 | with super().assertLogs(LOG) as cm: 61 | bad_model = 'bad_model' 62 | args = self.full_args.copy()[:-1] 63 | args.append(bad_model) 64 | pom.main(args) 65 | super().assertEqual(f'WARNING:PatternOmatic:Model {bad_model} not found, falling back to ' 66 | f'patternOmatic\'s default language model: en_core_web_sm', cm.output[2]) 67 | 68 | # Fatal error 69 | with mock.patch('scripts.patternomatic.ArgumentParser') as mock_arg_parser: 70 | mock_arg_parser.return_value = Exception('Mocked exception') 71 | 72 | with super().assertRaises(Exception): 73 | pom.main(self.full_args) 74 | 75 | def test_patternomatic_script(self): 76 | """ Checks that patternomatic can be run as a script properly """ 77 | script_path = os.path.join( 78 | os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'scripts', 'patternomatic.py') 79 | 80 | output_signal = os.system('python ' + script_path + ' -s Hello -s Goodbye') 81 | super().assertEqual(0, output_signal) 82 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | """ Unit testing module for settings module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import configparser 22 | import os 23 | import unittest 24 | 25 | from PatternOmatic.settings.config import Config, RecombinationType 26 | 27 | 28 | class TestConfig(unittest.TestCase): 29 | """ Test class for settings """ 30 | 31 | config = None 32 | 33 | def test_config_is_singleton(self): 34 | """ Tests config instance is a singleton one """ 35 | another_config = Config() 36 | super().assertEqual(self.config, another_config) 37 | 38 | def test_config_is_clearable(self): 39 | """ Tests its possible to renew the singleton instance """ 40 | Config.clear_instance() 41 | another_config = Config() 42 | 43 | super().assertNotEqual(self.config, another_config) 44 | 45 | def test_config_read_from_path(self): 46 | """ Tests providing or not providing a configuration file works as expected""" 47 | # No config file provided 48 | super().assertEqual(None, self.config.file_path) 49 | 50 | # Correct config file provided 51 | 52 | file_path = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini') 53 | Config.clear_instance() 54 | self.config = Config(file_path) 55 | super().assertEqual(file_path, self.config.file_path) 56 | 57 | # Bad path provided 58 | Config.clear_instance() 59 | self.config = Config('') 60 | super().assertEqual(None, self.config.file_path) 61 | 62 | def test_xps_gop_can_not_be_enabled_together(self): 63 | """ Tests Spacy's Grammar Operators and Extended Patter Syntax can not be enabled both """ 64 | config = Config() 65 | config.use_grammar_operators = True 66 | config.use_extended_pattern_syntax = True 67 | super().assertNotEqual(config.use_grammar_operators, config.use_extended_pattern_syntax) 68 | 69 | config.use_grammar_operators = False 70 | config.use_extended_pattern_syntax = True 71 | super().assertEqual(True, config.use_extended_pattern_syntax) 72 | 73 | config.use_grammar_operators = True 74 | super().assertEqual(False, config.use_extended_pattern_syntax) 75 | 76 | def test_setting_config_attribute_with_wrong_type_has_no_effect(self): 77 | config = Config() 78 | 79 | config.max_runs = 0.5 80 | config.use_extended_pattern_syntax = None 81 | config.fitness_function_type = RecombinationType.RANDOM_ONE_POINT_CROSSOVER 82 | config.report_path = 0 83 | 84 | super().assertNotEqual(config.max_runs, 0.5) 85 | super().assertNotEqual(config.use_extended_pattern_syntax, None) 86 | super().assertNotEqual(config.fitness_function_type, RecombinationType.RANDOM_ONE_POINT_CROSSOVER) 87 | super().assertNotEqual(config.report_path, 0) 88 | 89 | def test_validate_config_argument(self): 90 | """ Checks that config arguments are properly fetched according to its type """ 91 | config_parser = configparser.ConfigParser() 92 | 93 | test_section = 'test_section' 94 | test_option_int = 'test_option_int' 95 | test_option_float = 'test_option_float' 96 | test_option_boolean = 'test_option_boolean' 97 | test_option_string = 'test_option_string' 98 | 99 | config_parser.add_section(test_section) 100 | 101 | config_parser[test_section][test_option_int] = '0' 102 | config_parser[test_section][test_option_float] = '0.0' 103 | config_parser[test_section][test_option_boolean] = 'False' 104 | config_parser[test_section][test_option_string] = '' 105 | 106 | # With valid types 107 | super().assertEqual( 108 | 0, self.config._validate_config_argument(test_section, test_option_int, 1, config_parser)) 109 | super().assertEqual( 110 | .0, self.config._validate_config_argument(test_section, test_option_float, .1, config_parser)) 111 | super().assertEqual( 112 | False, self.config._validate_config_argument(test_section, test_option_boolean, True, config_parser)) 113 | super().assertEqual( 114 | '', self.config._validate_config_argument(test_section, test_option_string, 'Whatever', config_parser)) 115 | 116 | # With wrong type 117 | config_parser[test_section][test_option_int] = 'False' 118 | super().assertEqual( 119 | 1, self.config._validate_config_argument(test_section, test_option_int, 1, config_parser)) 120 | 121 | # With not even a possible type used by the config parser 122 | super().assertEqual( 123 | {}, self.config._validate_config_argument(test_section, test_option_int, {}, config_parser)) 124 | 125 | # 126 | # Helpers 127 | # 128 | def setUp(self) -> None: 129 | """ Fresh Config instance """ 130 | self.config = Config() 131 | 132 | def tearDown(self) -> None: 133 | """ Destroy Config instance """ 134 | Config.clear_instance() 135 | -------------------------------------------------------------------------------- /tests/test_stats.py: -------------------------------------------------------------------------------- 1 | """ Unit testing module for stats module 2 | 3 | This file is part of PatternOmatic. 4 | 5 | Copyright © 2020 Miguel Revuelta Espinosa 6 | 7 | PatternOmatic is free software: you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public License 9 | as published by the Free Software Foundation, either version 3 of 10 | the License, or (at your option) any later version. 11 | 12 | PatternOmatic is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with PatternOmatic. If not, see . 19 | 20 | """ 21 | import os 22 | from unittest import TestCase, mock 23 | 24 | from PatternOmatic.ge.individual import Individual 25 | from PatternOmatic.ge.stats import Stats 26 | from PatternOmatic.settings.config import Config 27 | from PatternOmatic.settings.literals import ReportFormat 28 | 29 | 30 | class TestStats(TestCase): 31 | """ Tests for Stats class """ 32 | 33 | stats = None 34 | test_report_path_file = 'test_report_path_file.txt' 35 | fitness_value_literal = 'fitness_value' 36 | 37 | def test_add_sr(self): 38 | """ SR accumulator works """ 39 | self.stats.add_sr(True) 40 | super().assertListEqual([True], self.stats.success_rate_accumulator) 41 | 42 | def test_add_mbf(self): 43 | """ MBF accumulator works """ 44 | self.stats.add_mbf(0.5) 45 | super().assertListEqual([0.5], self.stats.mbf_accumulator) 46 | 47 | def test_add_aes(self): 48 | """ AES accumulator works """ 49 | self.stats.add_aes(10) 50 | super().assertListEqual([10], self.stats.aes_accumulator) 51 | 52 | def test_add_time(self): 53 | """ Time accumulator works """ 54 | self.stats.add_time(0.2222) 55 | super().assertListEqual([0.2222], self.stats.time_accumulator) 56 | 57 | def test_add_most_fitted(self): 58 | """ Most fitted accumulator works """ 59 | expected = object.__new__(Individual) 60 | expected.__setattr__(self.fitness_value_literal, 0.5) 61 | 62 | self.stats.add_most_fitted(expected) 63 | super().assertListEqual([expected], self.stats.most_fitted_accumulator) 64 | 65 | def test_sum_aes(self): 66 | """ Time counter works """ 67 | self.stats.sum_aes(2) 68 | self.stats.sum_aes(2) 69 | super().assertEqual(4, self.stats.aes_counter,) 70 | 71 | def test_reset(self): 72 | """ Reset stats method works """ 73 | self.stats.aes_counter = 100 74 | self.stats.solution_found = True 75 | self.stats.reset() 76 | super().assertEqual(0, self.stats.aes_counter) 77 | super().assertEqual(False, self.stats.solution_found) 78 | 79 | def test_calculate_metrics(self): 80 | """ Calculate metrics works """ 81 | self.stats.success_rate_accumulator = [1, 1, 1] 82 | self.stats.mbf_accumulator = [2, 2, 2] 83 | self.stats.aes_counter = 100 84 | self.stats.time_accumulator = [3, 3, 3] 85 | 86 | self.stats.calculate_metrics() 87 | 88 | super().assertEqual(1, self.stats.success_rate) 89 | super().assertEqual(2, self.stats.mbf) 90 | super().assertEqual(100, self.stats.aes) 91 | super().assertEqual(3, self.stats.mean_time) 92 | 93 | def test_get_most_fitted(self): 94 | """ Most fitted individual is found on most fitted accumulator """ 95 | i1 = object.__new__(Individual) 96 | i1.__setattr__(self.fitness_value_literal, 0.01) 97 | i2 = object.__new__(Individual) 98 | i2.__setattr__(self.fitness_value_literal, 0.1) 99 | i3 = object.__new__(Individual) 100 | i3.__setattr__(self.fitness_value_literal, 0.001) 101 | 102 | mock_individual_list = list() 103 | 104 | mock_individual_list.append(i1) 105 | mock_individual_list.append(i2) 106 | mock_individual_list.append(i3) 107 | 108 | self.stats.most_fitted_accumulator = mock_individual_list 109 | 110 | super().assertEqual(self.stats.get_most_fitted(), i2) 111 | 112 | def test_avg(self): 113 | """ Average implementation works """ 114 | test_list_1 = [1, 2, 3] 115 | super().assertEqual(2, self.stats.avg(test_list_1)) 116 | 117 | def test_dict_and_repr(self): 118 | """ Checks that Stats instances are properly represented """ 119 | stats_dict = { 120 | 'success_rate': 1.0, 121 | 'mbf': 0.5, 122 | 'aes': 100, 123 | 'mean_time': 4.5, 124 | 'most_fitted': None 125 | } 126 | 127 | # Check that with no best individual representation is well formed 128 | stats = Stats() 129 | stats.success_rate = stats_dict['success_rate'] 130 | stats.mbf = stats_dict['mbf'] 131 | stats.aes = stats_dict['aes'] 132 | stats.mean_time = stats_dict['mean_time'] 133 | 134 | super().assertEqual(stats.__dict__, stats_dict) 135 | super().assertEqual(dict(stats), stats_dict) 136 | super().assertEqual(f'Stats({repr(stats_dict)})', repr(stats)) 137 | 138 | # Check that with most fitted accumulator representation is well formed 139 | i = object.__new__(Individual) 140 | i.__setattr__(self.fitness_value_literal, 1.0) 141 | 142 | stats.most_fitted_accumulator = [i] 143 | stats_dict['most_fitted'] = i.__dict__ 144 | 145 | super().assertDictEqual(stats_dict, stats.__dict__) 146 | super().assertEqual(stats_dict, dict(stats)) 147 | super().assertEqual(f'Stats({repr(stats_dict)})', repr(stats)) 148 | 149 | def test_persist(self): 150 | config = Config() 151 | config.report_format = ReportFormat.JSON 152 | config.report_path = self.test_report_path_file 153 | 154 | # When a best individual has been found 155 | i = object.__new__(Individual) 156 | i.__setattr__(self.fitness_value_literal, 1.0) 157 | self.stats.aes = 100 158 | self.stats.mbf = 0.9 159 | self.stats.mean_time = 0.42 160 | self.stats.success_rate = 1.0 161 | self.stats.most_fitted_accumulator = [i] 162 | self.stats.persist() 163 | 164 | with open(self.test_report_path_file, 'r') as persisted_report: 165 | red_report = persisted_report.readlines() 166 | 167 | super().assertEqual(str(dict(self.stats)) + '\n', red_report[0]) 168 | 169 | # When a best individual has not been found 170 | self.stats.most_fitted_accumulator = [] 171 | self.stats.persist() 172 | 173 | with open(self.test_report_path_file, 'r') as persisted_report: 174 | red_report = persisted_report.readlines() 175 | 176 | super().assertEqual(str(dict(self.stats)) + '\n', red_report[1]) 177 | 178 | def test_to_csv(self): 179 | """ Test stats instance dict to csv conversion """ 180 | with mock.patch('PatternOmatic.ge.stats.time') as mock_time: 181 | mock_time.return_value = .123 182 | self.stats.aes = 10 183 | self.stats.mbf = 0.5 184 | self.stats.mean_time = 0.22 185 | self.stats.success_rate = 0.5 186 | 187 | # When a best individual has not been found 188 | csv_stats = \ 189 | f'{.123}\t{self.stats.mbf}\t{self.stats.success_rate}\t{self.stats.aes}\t{self.stats.mean_time}\t' \ 190 | f'{None}\t' 191 | 192 | super().assertEqual(csv_stats, self.stats._to_csv()) 193 | 194 | # When a best individual has been found 195 | i = object.__new__(Individual) 196 | i.__setattr__(self.fitness_value_literal, 1.0) 197 | self.stats.most_fitted_accumulator = [i] 198 | 199 | csv_stats += f'{None}\t{i.fitness_value}\t' 200 | super().assertEqual(csv_stats, self.stats._to_csv()) 201 | 202 | # Also check csv is correctly persisted 203 | config = Config() 204 | config.report_path = self.test_report_path_file 205 | config.report_format = ReportFormat.CSV 206 | self.stats.persist() 207 | 208 | with open(self.test_report_path_file, 'r') as persisted_report: 209 | red_report = persisted_report.readlines() 210 | 211 | super().assertEqual(csv_stats + '\n', red_report[0]) 212 | 213 | # 214 | # Helpers 215 | # 216 | def setUp(self) -> None: 217 | """ Fresh Stats instance """ 218 | self.stats = Stats() 219 | if os.path.exists(self.test_report_path_file): 220 | os.remove(self.test_report_path_file) 221 | 222 | @classmethod 223 | def tearDownClass(cls) -> None: 224 | """ Remove temporary report file """ 225 | if os.path.exists(cls.test_report_path_file): 226 | os.remove(cls.test_report_path_file) 227 | --------------------------------------------------------------------------------