├── .gitignore
├── .travis.yml
├── COPYING.LESSER.md
├── COPYING.md
├── Makefile
├── PatternOmatic
    ├── __init__.py
    ├── api.py
    ├── ge
    │   ├── __init__.py
    │   ├── individual.py
    │   ├── population.py
    │   └── stats.py
    ├── nlp
    │   ├── __init__.py
    │   └── bnf.py
    └── settings
    │   ├── __init__.py
    │   ├── config.py
    │   ├── literals.py
    │   └── log.py
├── README.md
├── config.ini
├── patternomatic_logo.svg
├── requirements.txt
├── scripts
    ├── __init__.py
    └── patternomatic.py
├── setup.py
└── tests
    ├── __init__.py
    ├── test_api.py
    ├── test_bnf.py
    ├── test_individual.py
    ├── test_population.py
    ├── test_script.py
    ├── test_settings.py
    └── test_stats.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # PatternOmatic non sources to ignore
 2 | .idea/
 3 | /venv
 4 | *pycache*
 5 | .scannerwork/
 6 | .coverage
 7 | coverage.xml
 8 | build/
 9 | dist/
10 | PatternOmatic.egg-info
11 | fil-result
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # ······················································································································
 2 | # PatternOmatic's CI & CD pipeline. Mainly Makefile based
 3 | #
 4 | # This file is part of PatternOmatic.
 5 | #
 6 | # Copyright © 2020  Miguel Revuelta Espinosa
 7 | #
 8 | # PatternOmatic is free software: you can redistribute it and/or
 9 | # modify it under the terms of the GNU Lesser General Public License
10 | # as published by the Free Software Foundation, either version 3 of
11 | # the License, or (at your option) any later version.
12 | #
13 | # PatternOmatic is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Lesser General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Lesser General Public License
19 | # along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | # ······················································································································
22 | language: python
23 | 
24 | addons:
25 |   sonarcloud:
26 |     organization: "revuel"
27 | 
28 | python:
29 |   - "3.7"
30 | 
31 | if: tag IS blank
32 | 
33 | script:
34 | - make libs
35 | - make coverage
36 | - make sonarcloud
37 | - if [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then make build; fi
38 | - if [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then make publish; fi
39 | 


--------------------------------------------------------------------------------
/COPYING.LESSER.md:
--------------------------------------------------------------------------------
  1 | ### GNU LESSER GENERAL PUBLIC LICENSE
  2 | 
  3 | Version 3, 29 June 2007
  4 | 
  5 | Copyright (C) 2007 Free Software Foundation, Inc.
  6 | <https://fsf.org/>
  7 | 
  8 | Everyone is permitted to copy and distribute verbatim copies of this
  9 | license document, but changing it is not allowed.
 10 | 
 11 | This version of the GNU Lesser General Public License incorporates the
 12 | terms and conditions of version 3 of the GNU General Public License,
 13 | supplemented by the additional permissions listed below.
 14 | 
 15 | #### 0. Additional Definitions.
 16 | 
 17 | As used herein, "this License" refers to version 3 of the GNU Lesser
 18 | General Public License, and the "GNU GPL" refers to version 3 of the
 19 | GNU General Public License.
 20 | 
 21 | "The Library" refers to a covered work governed by this License, other
 22 | than an Application or a Combined Work as defined below.
 23 | 
 24 | An "Application" is any work that makes use of an interface provided
 25 | by the Library, but which is not otherwise based on the Library.
 26 | Defining a subclass of a class defined by the Library is deemed a mode
 27 | of using an interface provided by the Library.
 28 | 
 29 | A "Combined Work" is a work produced by combining or linking an
 30 | Application with the Library. The particular version of the Library
 31 | with which the Combined Work was made is also called the "Linked
 32 | Version".
 33 | 
 34 | The "Minimal Corresponding Source" for a Combined Work means the
 35 | Corresponding Source for the Combined Work, excluding any source code
 36 | for portions of the Combined Work that, considered in isolation, are
 37 | based on the Application, and not on the Linked Version.
 38 | 
 39 | The "Corresponding Application Code" for a Combined Work means the
 40 | object code and/or source code for the Application, including any data
 41 | and utility programs needed for reproducing the Combined Work from the
 42 | Application, but excluding the System Libraries of the Combined Work.
 43 | 
 44 | #### 1. Exception to Section 3 of the GNU GPL.
 45 | 
 46 | You may convey a covered work under sections 3 and 4 of this License
 47 | without being bound by section 3 of the GNU GPL.
 48 | 
 49 | #### 2. Conveying Modified Versions.
 50 | 
 51 | If you modify a copy of the Library, and, in your modifications, a
 52 | facility refers to a function or data to be supplied by an Application
 53 | that uses the facility (other than as an argument passed when the
 54 | facility is invoked), then you may convey a copy of the modified
 55 | version:
 56 | 
 57 | -   a) under this License, provided that you make a good faith effort
 58 |     to ensure that, in the event an Application does not supply the
 59 |     function or data, the facility still operates, and performs
 60 |     whatever part of its purpose remains meaningful, or
 61 | -   b) under the GNU GPL, with none of the additional permissions of
 62 |     this License applicable to that copy.
 63 | 
 64 | #### 3. Object Code Incorporating Material from Library Header Files.
 65 | 
 66 | The object code form of an Application may incorporate material from a
 67 | header file that is part of the Library. You may convey such object
 68 | code under terms of your choice, provided that, if the incorporated
 69 | material is not limited to numerical parameters, data structure
 70 | layouts and accessors, or small macros, inline functions and templates
 71 | (ten or fewer lines in length), you do both of the following:
 72 | 
 73 | -   a) Give prominent notice with each copy of the object code that
 74 |     the Library is used in it and that the Library and its use are
 75 |     covered by this License.
 76 | -   b) Accompany the object code with a copy of the GNU GPL and this
 77 |     license document.
 78 | 
 79 | #### 4. Combined Works.
 80 | 
 81 | You may convey a Combined Work under terms of your choice that, taken
 82 | together, effectively do not restrict modification of the portions of
 83 | the Library contained in the Combined Work and reverse engineering for
 84 | debugging such modifications, if you also do each of the following:
 85 | 
 86 | -   a) Give prominent notice with each copy of the Combined Work that
 87 |     the Library is used in it and that the Library and its use are
 88 |     covered by this License.
 89 | -   b) Accompany the Combined Work with a copy of the GNU GPL and this
 90 |     license document.
 91 | -   c) For a Combined Work that displays copyright notices during
 92 |     execution, include the copyright notice for the Library among
 93 |     these notices, as well as a reference directing the user to the
 94 |     copies of the GNU GPL and this license document.
 95 | -   d) Do one of the following:
 96 |     -   0) Convey the Minimal Corresponding Source under the terms of
 97 |         this License, and the Corresponding Application Code in a form
 98 |         suitable for, and under terms that permit, the user to
 99 |         recombine or relink the Application with a modified version of
100 |         the Linked Version to produce a modified Combined Work, in the
101 |         manner specified by section 6 of the GNU GPL for conveying
102 |         Corresponding Source.
103 |     -   1) Use a suitable shared library mechanism for linking with
104 |         the Library. A suitable mechanism is one that (a) uses at run
105 |         time a copy of the Library already present on the user's
106 |         computer system, and (b) will operate properly with a modified
107 |         version of the Library that is interface-compatible with the
108 |         Linked Version.
109 | -   e) Provide Installation Information, but only if you would
110 |     otherwise be required to provide such information under section 6
111 |     of the GNU GPL, and only to the extent that such information is
112 |     necessary to install and execute a modified version of the
113 |     Combined Work produced by recombining or relinking the Application
114 |     with a modified version of the Linked Version. (If you use option
115 |     4d0, the Installation Information must accompany the Minimal
116 |     Corresponding Source and Corresponding Application Code. If you
117 |     use option 4d1, you must provide the Installation Information in
118 |     the manner specified by section 6 of the GNU GPL for conveying
119 |     Corresponding Source.)
120 | 
121 | #### 5. Combined Libraries.
122 | 
123 | You may place library facilities that are a work based on the Library
124 | side by side in a single library together with other library
125 | facilities that are not Applications and are not covered by this
126 | License, and convey such a combined library under terms of your
127 | choice, if you do both of the following:
128 | 
129 | -   a) Accompany the combined library with a copy of the same work
130 |     based on the Library, uncombined with any other library
131 |     facilities, conveyed under the terms of this License.
132 | -   b) Give prominent notice with the combined library that part of it
133 |     is a work based on the Library, and explaining where to find the
134 |     accompanying uncombined form of the same work.
135 | 
136 | #### 6. Revised Versions of the GNU Lesser General Public License.
137 | 
138 | The Free Software Foundation may publish revised and/or new versions
139 | of the GNU Lesser General Public License from time to time. Such new
140 | versions will be similar in spirit to the present version, but may
141 | differ in detail to address new problems or concerns.
142 | 
143 | Each version is given a distinguishing version number. If the Library
144 | as you received it specifies that a certain numbered version of the
145 | GNU Lesser General Public License "or any later version" applies to
146 | it, you have the option of following the terms and conditions either
147 | of that published version or of any later version published by the
148 | Free Software Foundation. If the Library as you received it does not
149 | specify a version number of the GNU Lesser General Public License, you
150 | may choose any version of the GNU Lesser General Public License ever
151 | published by the Free Software Foundation.
152 | 
153 | If the Library as you received it specifies that a proxy can decide
154 | whether future versions of the GNU Lesser General Public License shall
155 | apply, that proxy's public statement of acceptance of any version is
156 | permanent authorization for you to choose that version for the
157 | Library.
158 | 


--------------------------------------------------------------------------------
/COPYING.md:
--------------------------------------------------------------------------------
  1 | ### GNU GENERAL PUBLIC LICENSE
  2 | 
  3 | Version 3, 29 June 2007
  4 | 
  5 | Copyright (C) 2007 Free Software Foundation, Inc.
  6 | <https://fsf.org/>
  7 | 
  8 | Everyone is permitted to copy and distribute verbatim copies of this
  9 | license document, but changing it is not allowed.
 10 | 
 11 | ### Preamble
 12 | 
 13 | The GNU General Public License is a free, copyleft license for
 14 | software and other kinds of works.
 15 | 
 16 | The licenses for most software and other practical works are designed
 17 | to take away your freedom to share and change the works. By contrast,
 18 | the GNU General Public License is intended to guarantee your freedom
 19 | to share and change all versions of a program--to make sure it remains
 20 | free software for all its users. We, the Free Software Foundation, use
 21 | the GNU General Public License for most of our software; it applies
 22 | also to any other work released this way by its authors. You can apply
 23 | it to your programs, too.
 24 | 
 25 | When we speak of free software, we are referring to freedom, not
 26 | price. Our General Public Licenses are designed to make sure that you
 27 | have the freedom to distribute copies of free software (and charge for
 28 | them if you wish), that you receive source code or can get it if you
 29 | want it, that you can change the software or use pieces of it in new
 30 | free programs, and that you know you can do these things.
 31 | 
 32 | To protect your rights, we need to prevent others from denying you
 33 | these rights or asking you to surrender the rights. Therefore, you
 34 | have certain responsibilities if you distribute copies of the
 35 | software, or if you modify it: responsibilities to respect the freedom
 36 | of others.
 37 | 
 38 | For example, if you distribute copies of such a program, whether
 39 | gratis or for a fee, you must pass on to the recipients the same
 40 | freedoms that you received. You must make sure that they, too, receive
 41 | or can get the source code. And you must show them these terms so they
 42 | know their rights.
 43 | 
 44 | Developers that use the GNU GPL protect your rights with two steps:
 45 | (1) assert copyright on the software, and (2) offer you this License
 46 | giving you legal permission to copy, distribute and/or modify it.
 47 | 
 48 | For the developers' and authors' protection, the GPL clearly explains
 49 | that there is no warranty for this free software. For both users' and
 50 | authors' sake, the GPL requires that modified versions be marked as
 51 | changed, so that their problems will not be attributed erroneously to
 52 | authors of previous versions.
 53 | 
 54 | Some devices are designed to deny users access to install or run
 55 | modified versions of the software inside them, although the
 56 | manufacturer can do so. This is fundamentally incompatible with the
 57 | aim of protecting users' freedom to change the software. The
 58 | systematic pattern of such abuse occurs in the area of products for
 59 | individuals to use, which is precisely where it is most unacceptable.
 60 | Therefore, we have designed this version of the GPL to prohibit the
 61 | practice for those products. If such problems arise substantially in
 62 | other domains, we stand ready to extend this provision to those
 63 | domains in future versions of the GPL, as needed to protect the
 64 | freedom of users.
 65 | 
 66 | Finally, every program is threatened constantly by software patents.
 67 | States should not allow patents to restrict development and use of
 68 | software on general-purpose computers, but in those that do, we wish
 69 | to avoid the special danger that patents applied to a free program
 70 | could make it effectively proprietary. To prevent this, the GPL
 71 | assures that patents cannot be used to render the program non-free.
 72 | 
 73 | The precise terms and conditions for copying, distribution and
 74 | modification follow.
 75 | 
 76 | ### TERMS AND CONDITIONS
 77 | 
 78 | #### 0. Definitions.
 79 | 
 80 | "This License" refers to version 3 of the GNU General Public License.
 81 | 
 82 | "Copyright" also means copyright-like laws that apply to other kinds
 83 | of works, such as semiconductor masks.
 84 | 
 85 | "The Program" refers to any copyrightable work licensed under this
 86 | License. Each licensee is addressed as "you". "Licensees" and
 87 | "recipients" may be individuals or organizations.
 88 | 
 89 | To "modify" a work means to copy from or adapt all or part of the work
 90 | in a fashion requiring copyright permission, other than the making of
 91 | an exact copy. The resulting work is called a "modified version" of
 92 | the earlier work or a work "based on" the earlier work.
 93 | 
 94 | A "covered work" means either the unmodified Program or a work based
 95 | on the Program.
 96 | 
 97 | To "propagate" a work means to do anything with it that, without
 98 | permission, would make you directly or secondarily liable for
 99 | infringement under applicable copyright law, except executing it on a
100 | computer or modifying a private copy. Propagation includes copying,
101 | distribution (with or without modification), making available to the
102 | public, and in some countries other activities as well.
103 | 
104 | To "convey" a work means any kind of propagation that enables other
105 | parties to make or receive copies. Mere interaction with a user
106 | through a computer network, with no transfer of a copy, is not
107 | conveying.
108 | 
109 | An interactive user interface displays "Appropriate Legal Notices" to
110 | the extent that it includes a convenient and prominently visible
111 | feature that (1) displays an appropriate copyright notice, and (2)
112 | tells the user that there is no warranty for the work (except to the
113 | extent that warranties are provided), that licensees may convey the
114 | work under this License, and how to view a copy of this License. If
115 | the interface presents a list of user commands or options, such as a
116 | menu, a prominent item in the list meets this criterion.
117 | 
118 | #### 1. Source Code.
119 | 
120 | The "source code" for a work means the preferred form of the work for
121 | making modifications to it. "Object code" means any non-source form of
122 | a work.
123 | 
124 | A "Standard Interface" means an interface that either is an official
125 | standard defined by a recognized standards body, or, in the case of
126 | interfaces specified for a particular programming language, one that
127 | is widely used among developers working in that language.
128 | 
129 | The "System Libraries" of an executable work include anything, other
130 | than the work as a whole, that (a) is included in the normal form of
131 | packaging a Major Component, but which is not part of that Major
132 | Component, and (b) serves only to enable use of the work with that
133 | Major Component, or to implement a Standard Interface for which an
134 | implementation is available to the public in source code form. A
135 | "Major Component", in this context, means a major essential component
136 | (kernel, window system, and so on) of the specific operating system
137 | (if any) on which the executable work runs, or a compiler used to
138 | produce the work, or an object code interpreter used to run it.
139 | 
140 | The "Corresponding Source" for a work in object code form means all
141 | the source code needed to generate, install, and (for an executable
142 | work) run the object code and to modify the work, including scripts to
143 | control those activities. However, it does not include the work's
144 | System Libraries, or general-purpose tools or generally available free
145 | programs which are used unmodified in performing those activities but
146 | which are not part of the work. For example, Corresponding Source
147 | includes interface definition files associated with source files for
148 | the work, and the source code for shared libraries and dynamically
149 | linked subprograms that the work is specifically designed to require,
150 | such as by intimate data communication or control flow between those
151 | subprograms and other parts of the work.
152 | 
153 | The Corresponding Source need not include anything that users can
154 | regenerate automatically from other parts of the Corresponding Source.
155 | 
156 | The Corresponding Source for a work in source code form is that same
157 | work.
158 | 
159 | #### 2. Basic Permissions.
160 | 
161 | All rights granted under this License are granted for the term of
162 | copyright on the Program, and are irrevocable provided the stated
163 | conditions are met. This License explicitly affirms your unlimited
164 | permission to run the unmodified Program. The output from running a
165 | covered work is covered by this License only if the output, given its
166 | content, constitutes a covered work. This License acknowledges your
167 | rights of fair use or other equivalent, as provided by copyright law.
168 | 
169 | You may make, run and propagate covered works that you do not convey,
170 | without conditions so long as your license otherwise remains in force.
171 | You may convey covered works to others for the sole purpose of having
172 | them make modifications exclusively for you, or provide you with
173 | facilities for running those works, provided that you comply with the
174 | terms of this License in conveying all material for which you do not
175 | control copyright. Those thus making or running the covered works for
176 | you must do so exclusively on your behalf, under your direction and
177 | control, on terms that prohibit them from making any copies of your
178 | copyrighted material outside their relationship with you.
179 | 
180 | Conveying under any other circumstances is permitted solely under the
181 | conditions stated below. Sublicensing is not allowed; section 10 makes
182 | it unnecessary.
183 | 
184 | #### 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
185 | 
186 | No covered work shall be deemed part of an effective technological
187 | measure under any applicable law fulfilling obligations under article
188 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
189 | similar laws prohibiting or restricting circumvention of such
190 | measures.
191 | 
192 | When you convey a covered work, you waive any legal power to forbid
193 | circumvention of technological measures to the extent such
194 | circumvention is effected by exercising rights under this License with
195 | respect to the covered work, and you disclaim any intention to limit
196 | operation or modification of the work as a means of enforcing, against
197 | the work's users, your or third parties' legal rights to forbid
198 | circumvention of technological measures.
199 | 
200 | #### 4. Conveying Verbatim Copies.
201 | 
202 | You may convey verbatim copies of the Program's source code as you
203 | receive it, in any medium, provided that you conspicuously and
204 | appropriately publish on each copy an appropriate copyright notice;
205 | keep intact all notices stating that this License and any
206 | non-permissive terms added in accord with section 7 apply to the code;
207 | keep intact all notices of the absence of any warranty; and give all
208 | recipients a copy of this License along with the Program.
209 | 
210 | You may charge any price or no price for each copy that you convey,
211 | and you may offer support or warranty protection for a fee.
212 | 
213 | #### 5. Conveying Modified Source Versions.
214 | 
215 | You may convey a work based on the Program, or the modifications to
216 | produce it from the Program, in the form of source code under the
217 | terms of section 4, provided that you also meet all of these
218 | conditions:
219 | 
220 | -   a) The work must carry prominent notices stating that you modified
221 |     it, and giving a relevant date.
222 | -   b) The work must carry prominent notices stating that it is
223 |     released under this License and any conditions added under
224 |     section 7. This requirement modifies the requirement in section 4
225 |     to "keep intact all notices".
226 | -   c) You must license the entire work, as a whole, under this
227 |     License to anyone who comes into possession of a copy. This
228 |     License will therefore apply, along with any applicable section 7
229 |     additional terms, to the whole of the work, and all its parts,
230 |     regardless of how they are packaged. This License gives no
231 |     permission to license the work in any other way, but it does not
232 |     invalidate such permission if you have separately received it.
233 | -   d) If the work has interactive user interfaces, each must display
234 |     Appropriate Legal Notices; however, if the Program has interactive
235 |     interfaces that do not display Appropriate Legal Notices, your
236 |     work need not make them do so.
237 | 
238 | A compilation of a covered work with other separate and independent
239 | works, which are not by their nature extensions of the covered work,
240 | and which are not combined with it such as to form a larger program,
241 | in or on a volume of a storage or distribution medium, is called an
242 | "aggregate" if the compilation and its resulting copyright are not
243 | used to limit the access or legal rights of the compilation's users
244 | beyond what the individual works permit. Inclusion of a covered work
245 | in an aggregate does not cause this License to apply to the other
246 | parts of the aggregate.
247 | 
248 | #### 6. Conveying Non-Source Forms.
249 | 
250 | You may convey a covered work in object code form under the terms of
251 | sections 4 and 5, provided that you also convey the machine-readable
252 | Corresponding Source under the terms of this License, in one of these
253 | ways:
254 | 
255 | -   a) Convey the object code in, or embodied in, a physical product
256 |     (including a physical distribution medium), accompanied by the
257 |     Corresponding Source fixed on a durable physical medium
258 |     customarily used for software interchange.
259 | -   b) Convey the object code in, or embodied in, a physical product
260 |     (including a physical distribution medium), accompanied by a
261 |     written offer, valid for at least three years and valid for as
262 |     long as you offer spare parts or customer support for that product
263 |     model, to give anyone who possesses the object code either (1) a
264 |     copy of the Corresponding Source for all the software in the
265 |     product that is covered by this License, on a durable physical
266 |     medium customarily used for software interchange, for a price no
267 |     more than your reasonable cost of physically performing this
268 |     conveying of source, or (2) access to copy the Corresponding
269 |     Source from a network server at no charge.
270 | -   c) Convey individual copies of the object code with a copy of the
271 |     written offer to provide the Corresponding Source. This
272 |     alternative is allowed only occasionally and noncommercially, and
273 |     only if you received the object code with such an offer, in accord
274 |     with subsection 6b.
275 | -   d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge. You need not require recipients to copy the
279 |     Corresponding Source along with the object code. If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source. Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | -   e) Convey the object code using peer-to-peer transmission,
288 |     provided you inform other peers where the object code and
289 |     Corresponding Source of the work are being offered to the general
290 |     public at no charge under subsection 6d.
291 | 
292 | A separable portion of the object code, whose source code is excluded
293 | from the Corresponding Source as a System Library, need not be
294 | included in conveying the object code work.
295 | 
296 | A "User Product" is either (1) a "consumer product", which means any
297 | tangible personal property which is normally used for personal,
298 | family, or household purposes, or (2) anything designed or sold for
299 | incorporation into a dwelling. In determining whether a product is a
300 | consumer product, doubtful cases shall be resolved in favor of
301 | coverage. For a particular product received by a particular user,
302 | "normally used" refers to a typical or common use of that class of
303 | product, regardless of the status of the particular user or of the way
304 | in which the particular user actually uses, or expects or is expected
305 | to use, the product. A product is a consumer product regardless of
306 | whether the product has substantial commercial, industrial or
307 | non-consumer uses, unless such uses represent the only significant
308 | mode of use of the product.
309 | 
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to
312 | install and execute modified versions of a covered work in that User
313 | Product from a modified version of its Corresponding Source. The
314 | information must suffice to ensure that the continued functioning of
315 | the modified object code is in no case prevented or interfered with
316 | solely because modification has been made.
317 | 
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or
331 | updates for a work that has been modified or installed by the
332 | recipient, or for the User Product in which it has been modified or
333 | installed. Access to a network may be denied when the modification
334 | itself materially and adversely affects the operation of the network
335 | or violates the rules and protocols for communication across the
336 | network.
337 | 
338 | Corresponding Source conveyed, and Installation Information provided,
339 | in accord with this section must be in a format that is publicly
340 | documented (and with an implementation available to the public in
341 | source code form), and must require no special password or key for
342 | unpacking, reading or copying.
343 | 
344 | #### 7. Additional Terms.
345 | 
346 | "Additional permissions" are terms that supplement the terms of this
347 | License by making exceptions from one or more of its conditions.
348 | Additional permissions that are applicable to the entire Program shall
349 | be treated as though they were included in this License, to the extent
350 | that they are valid under applicable law. If additional permissions
351 | apply only to part of the Program, that part may be used separately
352 | under those permissions, but the entire Program remains governed by
353 | this License without regard to the additional permissions.
354 | 
355 | When you convey a copy of a covered work, you may at your option
356 | remove any additional permissions from that copy, or from any part of
357 | it. (Additional permissions may be written to require their own
358 | removal in certain cases when you modify the work.) You may place
359 | additional permissions on material, added by you to a covered work,
360 | for which you have or can give appropriate copyright permission.
361 | 
362 | Notwithstanding any other provision of this License, for material you
363 | add to a covered work, you may (if authorized by the copyright holders
364 | of that material) supplement the terms of this License with terms:
365 | 
366 | -   a) Disclaiming warranty or limiting liability differently from the
367 |     terms of sections 15 and 16 of this License; or
368 | -   b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | -   c) Prohibiting misrepresentation of the origin of that material,
372 |     or requiring that modified versions of such material be marked in
373 |     reasonable ways as different from the original version; or
374 | -   d) Limiting the use for publicity purposes of names of licensors
375 |     or authors of the material; or
376 | -   e) Declining to grant rights under trademark law for use of some
377 |     trade names, trademarks, or service marks; or
378 | -   f) Requiring indemnification of licensors and authors of that
379 |     material by anyone who conveys the material (or modified versions
380 |     of it) with contractual assumptions of liability to the recipient,
381 |     for any liability that these contractual assumptions directly
382 |     impose on those licensors and authors.
383 | 
384 | All other non-permissive additional terms are considered "further
385 | restrictions" within the meaning of section 10. If the Program as you
386 | received it, or any part of it, contains a notice stating that it is
387 | governed by this License along with a term that is a further
388 | restriction, you may remove that term. If a license document contains
389 | a further restriction but permits relicensing or conveying under this
390 | License, you may add to a covered work material governed by the terms
391 | of that license document, provided that the further restriction does
392 | not survive such relicensing or conveying.
393 | 
394 | If you add terms to a covered work in accord with this section, you
395 | must place, in the relevant source files, a statement of the
396 | additional terms that apply to those files, or a notice indicating
397 | where to find the applicable terms.
398 | 
399 | Additional terms, permissive or non-permissive, may be stated in the
400 | form of a separately written license, or stated as exceptions; the
401 | above requirements apply either way.
402 | 
403 | #### 8. Termination.
404 | 
405 | You may not propagate or modify a covered work except as expressly
406 | provided under this License. Any attempt otherwise to propagate or
407 | modify it is void, and will automatically terminate your rights under
408 | this License (including any patent licenses granted under the third
409 | paragraph of section 11).
410 | 
411 | However, if you cease all violation of this License, then your license
412 | from a particular copyright holder is reinstated (a) provisionally,
413 | unless and until the copyright holder explicitly and finally
414 | terminates your license, and (b) permanently, if the copyright holder
415 | fails to notify you of the violation by some reasonable means prior to
416 | 60 days after the cessation.
417 | 
418 | Moreover, your license from a particular copyright holder is
419 | reinstated permanently if the copyright holder notifies you of the
420 | violation by some reasonable means, this is the first time you have
421 | received notice of violation of this License (for any work) from that
422 | copyright holder, and you cure the violation prior to 30 days after
423 | your receipt of the notice.
424 | 
425 | Termination of your rights under this section does not terminate the
426 | licenses of parties who have received copies or rights from you under
427 | this License. If your rights have been terminated and not permanently
428 | reinstated, you do not qualify to receive new licenses for the same
429 | material under section 10.
430 | 
431 | #### 9. Acceptance Not Required for Having Copies.
432 | 
433 | You are not required to accept this License in order to receive or run
434 | a copy of the Program. Ancillary propagation of a covered work
435 | occurring solely as a consequence of using peer-to-peer transmission
436 | to receive a copy likewise does not require acceptance. However,
437 | nothing other than this License grants you permission to propagate or
438 | modify any covered work. These actions infringe copyright if you do
439 | not accept this License. Therefore, by modifying or propagating a
440 | covered work, you indicate your acceptance of this License to do so.
441 | 
442 | #### 10. Automatic Licensing of Downstream Recipients.
443 | 
444 | Each time you convey a covered work, the recipient automatically
445 | receives a license from the original licensors, to run, modify and
446 | propagate that work, subject to this License. You are not responsible
447 | for enforcing compliance by third parties with this License.
448 | 
449 | An "entity transaction" is a transaction transferring control of an
450 | organization, or substantially all assets of one, or subdividing an
451 | organization, or merging organizations. If propagation of a covered
452 | work results from an entity transaction, each party to that
453 | transaction who receives a copy of the work also receives whatever
454 | licenses to the work the party's predecessor in interest had or could
455 | give under the previous paragraph, plus a right to possession of the
456 | Corresponding Source of the work from the predecessor in interest, if
457 | the predecessor has it or can get it with reasonable efforts.
458 | 
459 | You may not impose any further restrictions on the exercise of the
460 | rights granted or affirmed under this License. For example, you may
461 | not impose a license fee, royalty, or other charge for exercise of
462 | rights granted under this License, and you may not initiate litigation
463 | (including a cross-claim or counterclaim in a lawsuit) alleging that
464 | any patent claim is infringed by making, using, selling, offering for
465 | sale, or importing the Program or any portion of it.
466 | 
467 | #### 11. Patents.
468 | 
469 | A "contributor" is a copyright holder who authorizes use under this
470 | License of the Program or a work on which the Program is based. The
471 | work thus licensed is called the contributor's "contributor version".
472 | 
473 | A contributor's "essential patent claims" are all patent claims owned
474 | or controlled by the contributor, whether already acquired or
475 | hereafter acquired, that would be infringed by some manner, permitted
476 | by this License, of making, using, or selling its contributor version,
477 | but do not include claims that would be infringed only as a
478 | consequence of further modification of the contributor version. For
479 | purposes of this definition, "control" includes the right to grant
480 | patent sublicenses in a manner consistent with the requirements of
481 | this License.
482 | 
483 | Each contributor grants you a non-exclusive, worldwide, royalty-free
484 | patent license under the contributor's essential patent claims, to
485 | make, use, sell, offer for sale, import and otherwise run, modify and
486 | propagate the contents of its contributor version.
487 | 
488 | In the following three paragraphs, a "patent license" is any express
489 | agreement or commitment, however denominated, not to enforce a patent
490 | (such as an express permission to practice a patent or covenant not to
491 | sue for patent infringement). To "grant" such a patent license to a
492 | party means to make such an agreement or commitment not to enforce a
493 | patent against the party.
494 | 
495 | If you convey a covered work, knowingly relying on a patent license,
496 | and the Corresponding Source of the work is not available for anyone
497 | to copy, free of charge and under the terms of this License, through a
498 | publicly available network server or other readily accessible means,
499 | then you must either (1) cause the Corresponding Source to be so
500 | available, or (2) arrange to deprive yourself of the benefit of the
501 | patent license for this particular work, or (3) arrange, in a manner
502 | consistent with the requirements of this License, to extend the patent
503 | license to downstream recipients. "Knowingly relying" means you have
504 | actual knowledge that, but for the patent license, your conveying the
505 | covered work in a country, or your recipient's use of the covered work
506 | in a country, would infringe one or more identifiable patents in that
507 | country that you have reason to believe are valid.
508 | 
509 | If, pursuant to or in connection with a single transaction or
510 | arrangement, you convey, or propagate by procuring conveyance of, a
511 | covered work, and grant a patent license to some of the parties
512 | receiving the covered work authorizing them to use, propagate, modify
513 | or convey a specific copy of the covered work, then the patent license
514 | you grant is automatically extended to all recipients of the covered
515 | work and works based on it.
516 | 
517 | A patent license is "discriminatory" if it does not include within the
518 | scope of its coverage, prohibits the exercise of, or is conditioned on
519 | the non-exercise of one or more of the rights that are specifically
520 | granted under this License. You may not convey a covered work if you
521 | are a party to an arrangement with a third party that is in the
522 | business of distributing software, under which you make payment to the
523 | third party based on the extent of your activity of conveying the
524 | work, and under which the third party grants, to any of the parties
525 | who would receive the covered work from you, a discriminatory patent
526 | license (a) in connection with copies of the covered work conveyed by
527 | you (or copies made from those copies), or (b) primarily for and in
528 | connection with specific products or compilations that contain the
529 | covered work, unless you entered into that arrangement, or that patent
530 | license was granted, prior to 28 March 2007.
531 | 
532 | Nothing in this License shall be construed as excluding or limiting
533 | any implied license or other defenses to infringement that may
534 | otherwise be available to you under applicable patent law.
535 | 
536 | #### 12. No Surrender of Others' Freedom.
537 | 
538 | If conditions are imposed on you (whether by court order, agreement or
539 | otherwise) that contradict the conditions of this License, they do not
540 | excuse you from the conditions of this License. If you cannot convey a
541 | covered work so as to satisfy simultaneously your obligations under
542 | this License and any other pertinent obligations, then as a
543 | consequence you may not convey it at all. For example, if you agree to
544 | terms that obligate you to collect a royalty for further conveying
545 | from those to whom you convey the Program, the only way you could
546 | satisfy both those terms and this License would be to refrain entirely
547 | from conveying the Program.
548 | 
549 | #### 13. Use with the GNU Affero General Public License.
550 | 
551 | Notwithstanding any other provision of this License, you have
552 | permission to link or combine any covered work with a work licensed
553 | under version 3 of the GNU Affero General Public License into a single
554 | combined work, and to convey the resulting work. The terms of this
555 | License will continue to apply to the part which is the covered work,
556 | but the special requirements of the GNU Affero General Public License,
557 | section 13, concerning interaction through a network will apply to the
558 | combination as such.
559 | 
560 | #### 14. Revised Versions of this License.
561 | 
562 | The Free Software Foundation may publish revised and/or new versions
563 | of the GNU General Public License from time to time. Such new versions
564 | will be similar in spirit to the present version, but may differ in
565 | detail to address new problems or concerns.
566 | 
567 | Each version is given a distinguishing version number. If the Program
568 | specifies that a certain numbered version of the GNU General Public
569 | License "or any later version" applies to it, you have the option of
570 | following the terms and conditions either of that numbered version or
571 | of any later version published by the Free Software Foundation. If the
572 | Program does not specify a version number of the GNU General Public
573 | License, you may choose any version ever published by the Free
574 | Software Foundation.
575 | 
576 | If the Program specifies that a proxy can decide which future versions
577 | of the GNU General Public License can be used, that proxy's public
578 | statement of acceptance of a version permanently authorizes you to
579 | choose that version for the Program.
580 | 
581 | Later license versions may give you additional or different
582 | permissions. However, no additional obligations are imposed on any
583 | author or copyright holder as a result of your choosing to follow a
584 | later version.
585 | 
586 | #### 15. Disclaimer of Warranty.
587 | 
588 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
589 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
590 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
591 | WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
592 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
593 | A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
594 | PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
595 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
596 | CORRECTION.
597 | 
598 | #### 16. Limitation of Liability.
599 | 
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
602 | CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
603 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
604 | ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
605 | NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
606 | LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
607 | TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
608 | PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
609 | 
610 | #### 17. Interpretation of Sections 15 and 16.
611 | 
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 | END OF TERMS AND CONDITIONS
620 | 
621 | ### How to Apply These Terms to Your New Programs
622 | 
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these
626 | terms.
627 | 
628 | To do so, attach the following notices to the program. It is safest to
629 | attach them to the start of each source file to most effectively state
630 | the exclusion of warranty; and each file should have at least the
631 | "copyright" line and a pointer to where the full notice is found.
632 | 
633 |         <one line to give the program's name and a brief idea of what it does.>
634 |         Copyright (C) <year>  <name of author>
635 | 
636 |         This program is free software: you can redistribute it and/or modify
637 |         it under the terms of the GNU General Public License as published by
638 |         the Free Software Foundation, either version 3 of the License, or
639 |         (at your option) any later version.
640 | 
641 |         This program is distributed in the hope that it will be useful,
642 |         but WITHOUT ANY WARRANTY; without even the implied warranty of
643 |         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
644 |         GNU General Public License for more details.
645 | 
646 |         You should have received a copy of the GNU General Public License
647 |         along with this program.  If not, see <https://www.gnu.org/licenses/>.
648 | 
649 | Also add information on how to contact you by electronic and paper
650 | mail.
651 | 
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |         <program>  Copyright (C) <year>  <name of author>
656 |         This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |         This is free software, and you are welcome to redistribute it
658 |         under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands \`show w' and \`show c' should show the
661 | appropriate parts of the General Public License. Of course, your
662 | program's commands might be different; for a GUI interface, you would
663 | use an "about box".
664 | 
665 | You should also get your employer (if you work as a programmer) or
666 | school, if any, to sign a "copyright disclaimer" for the program, if
667 | necessary. For more information on this, and how to apply and follow
668 | the GNU GPL, see <https://www.gnu.org/licenses/>.
669 | 
670 | The GNU General Public License does not permit incorporating your
671 | program into proprietary programs. If your program is a subroutine
672 | library, you may consider it more useful to permit linking proprietary
673 | applications with the library. If this is what you want to do, use the
674 | GNU Lesser General Public License instead of this License. But first,
675 | please read <https://www.gnu.org/licenses/why-not-lgpl.html>.
676 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # PatternOmatic Makefile
 3 | #
 4 | # This file is part of PatternOmatic.
 5 | #
 6 | # Copyright © 2020  Miguel Revuelta Espinosa
 7 | #
 8 | # PatternOmatic is free software: you can redistribute it and/or
 9 | # modify it under the terms of the GNU Lesser General Public License
10 | # as published by the Free Software Foundation, either version 3 of
11 | # the License, or (at your option) any later version.
12 | #
13 | # PatternOmatic is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Lesser General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Lesser General Public License
19 | # along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | export PYTHONPATH=.
22 | 
23 | all: libs coverage clean build sonar
24 | 
25 | venv:
26 | 	source venv/bin/activate
27 | 
28 | clean:
29 | 	rm -rf `pwd`/build
30 | 	rm -rf `pwd`/dist
31 | 	rm -rf `pwd`/PatternOmatic.egg-info
32 | 	rm -rf `pwd`/fil-result
33 | 
34 | libs:
35 | 	pip install -r requirements.txt
36 | 
37 | test:
38 | 	python -m unittest
39 | 
40 | coverage:
41 | 	coverage run --branch --source=PatternOmatic,scripts,tests --omit=*__init__* -m unittest && \
42 | 	coverage report --ignore-errors --omit=venv/**,tests/**,*__init__* && \
43 | 	coverage xml
44 | 
45 | sonar:
46 | 	sonar-scanner -Dsonar.projectKey=pOm -Dsonar.exclusions=tests/**
47 | 
48 | sonarcloud:
49 | 	sonar-scanner -Dsonar.projectKey=revuel_PatternOmatic
50 | 
51 | build:
52 | 	python setup.py sdist bdist_wheel
53 | 
54 | publish:
55 | 	twine upload -u __token__ -p ${PYPI_TOKEN} --repository-url https://upload.pypi.org/legacy/ dist/*
56 | 
57 | run:
58 | 	python ./scripts/patternomatic.py -s Hello Mr. Puffin -s Goodbye Mrs. Muffin
59 | 


--------------------------------------------------------------------------------
/PatternOmatic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/__init__.py


--------------------------------------------------------------------------------
/PatternOmatic/api.py:
--------------------------------------------------------------------------------
 1 | """ Application Programming Interface module
 2 | 
 3 | This file is part of PatternOmatic.
 4 | 
 5 | Copyright © 2020  Miguel Revuelta Espinosa
 6 | 
 7 | PatternOmatic is free software: you can redistribute it and/or
 8 | modify it under the terms of the GNU Lesser General Public License
 9 | as published by the Free Software Foundation, either version 3 of
10 | the License, or (at your option) any later version.
11 | 
12 | PatternOmatic is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 | 
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | """
21 | import time
22 | import pkg_resources
23 | from typing import List, Union, Tuple, Any
24 | from spacy import load as spacy_load
25 | from spacy.cli import download as spacy_download
26 | 
27 | from PatternOmatic.ge.population import Population
28 | from PatternOmatic.ge.stats import Stats
29 | from PatternOmatic.settings.config import Config
30 | from PatternOmatic.settings.log import LOG
31 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg
32 | 
33 | 
34 | def find_patterns(
35 |         samples: List[str],
36 |         configuration: Union[str, None] = None,
37 |         spacy_language_model_name: Union[str, None] = None) -> List[Tuple[Any, ...]]:
38 |     """
39 |     Given some samples, this function finds optimized patterns to be used by the Spacy's Rule Based Matcher.
40 |     Args:
41 |         samples: List of strings from where to find common linguistic patterns
42 |         configuration: (str) Optional configuration file path to to be loaded (Fallbacks to default configuration)
43 |         spacy_language_model_name: (str) Optional valid Spacy Language Model (Fallbacks to Spacy's en_core_web_sm)
44 | 
45 |     Returns: List of patterns found and list of each pattern matching score against the samples
46 | 
47 |     """
48 |     LOG.info(f'Loading language model {spacy_language_model_name}...')
49 |     if 'en-core-web-sm' not in [d.project_name for d in pkg_resources.working_set]:
50 |         LOG.info(f'PatternOmatic\'s default spaCy\'s Language Model not installed,'
51 |                  f' proceeding to install en_core_web_sm, please wait...')
52 |         spacy_download('en_core_web_sm')
53 | 
54 |     try:
55 |         nlp = spacy_load(spacy_language_model_name)
56 |     except OSError:
57 |         LOG.warning(f'Model {spacy_language_model_name} not found, '
58 |                     f'falling back to patternOmatic\'s default language model: en_core_web_sm')
59 | 
60 |         nlp = spacy_load('en_core_web_sm')
61 | 
62 |     LOG.info(f'Building Doc instances...')
63 |     samples = [nlp(sample) for sample in samples]
64 | 
65 |     if isinstance(configuration, str):
66 |         LOG.info(f'Setting up configuration from the following path: {configuration}...')
67 |         config = Config(config_file_path=configuration)
68 |     else:
69 |         config = Config()
70 |         LOG.info(f'Existing Config instance found: {config}')
71 | 
72 |     stats = Stats()
73 | 
74 |     bnf_g = dgg(samples)
75 | 
76 |     LOG.info('Starting Execution...')
77 |     for _ in range(0, config.max_runs):
78 |         start = time.monotonic()
79 |         p = Population(samples, bnf_g, stats)
80 |         p.evolve()
81 |         end = time.monotonic()
82 |         stats.add_time(end - start)
83 |         stats.calculate_metrics()
84 | 
85 |     LOG.info(f'Execution report {stats}')
86 |     stats.persist()
87 | 
88 |     LOG.info(f'Best individuals for this execution:')
89 |     stats.most_fitted_accumulator.sort(key=lambda i: i.fitness_value, reverse=True)
90 |     for individual in stats.most_fitted_accumulator:
91 |         LOG.info(f'{individual}')
92 | 
93 |     return list(zip(*[[i.fenotype, i.fitness_value] for i in stats.most_fitted_accumulator]))
94 | 


--------------------------------------------------------------------------------
/PatternOmatic/ge/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/ge/__init__.py


--------------------------------------------------------------------------------
/PatternOmatic/ge/individual.py:
--------------------------------------------------------------------------------
  1 | """ Evolutionary Individual related classes module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import re
 22 | import json
 23 | 
 24 | from random import random
 25 | from itertools import cycle
 26 | from spacy.tokens import Doc
 27 | from spacy.matcher import Matcher
 28 | 
 29 | from PatternOmatic.ge.stats import Stats
 30 | from PatternOmatic.settings.config import Config
 31 | from PatternOmatic.settings.log import LOG
 32 | from PatternOmatic.settings.literals import FitnessType, S, T, XPS, TOKEN_WILDCARD, UNDERSCORE, P, F, EF, IN, NOT_IN, \
 33 |     SLD, SRD, GTH, LTH, GEQ, LEQ, EQQ, XPS_AS
 34 | 
 35 | 
 36 | class Fitness(object):
 37 |     """ Dispatches the proper fitness type for individual instances """
 38 |     __slots__ = ('_fitness', 'config', 'samples', 'fenotype')
 39 | 
 40 |     def __init__(self, config, samples, fenotype):
 41 |         self.config = config
 42 |         self.samples = samples
 43 |         self.fenotype = fenotype
 44 |         self._dispatch_fitness(self.config.fitness_function_type)
 45 | 
 46 |     def __call__(self, *args, **kwargs) -> float:
 47 |         return self._fitness()
 48 | 
 49 |     def _dispatch_fitness(self, fitness_function_type: FitnessType) -> None:
 50 |         """
 51 |         Sets the type of the fitness function for an Individual instance
 52 |         Args:
 53 |             fitness_function_type: The fitness function to be used
 54 | 
 55 |         Returns: None
 56 | 
 57 |         """
 58 |         if fitness_function_type == FitnessType.FULL_MATCH:
 59 |             self._fitness = self._fitness_full_match
 60 |         else:
 61 |             self._fitness = self._fitness_basic
 62 | 
 63 |     def _fitness_basic(self) -> float:
 64 |         """
 65 |         Sets the fitness value for an individual. If makes a partial match over a sample, a score is added
 66 |         for that sample even if the matches are only a portion of the sample's length
 67 |         Returns: Float (fitness value)
 68 | 
 69 |         """
 70 |         max_score_per_sample = 1 / len(self.samples)
 71 |         matcher = Matcher(self.samples[0].vocab)
 72 |         matcher.add(repr(FitnessType.BASIC), None, self.fenotype)
 73 |         contact = 0.0
 74 | 
 75 |         for sample in self.samples:
 76 |             matches = matcher(sample)
 77 |             if len(matches) > 0:
 78 |                 contact += max_score_per_sample
 79 | 
 80 |         return self._wildcard_penalty(contact)
 81 | 
 82 |     def _fitness_full_match(self) -> float:
 83 |         """
 84 |         Sets the fitness value for an individual. It only gives a partial score if any of the matches equals the full
 85 |         length of the sample
 86 |         Returns: Float
 87 | 
 88 |         """
 89 |         max_score_per_sample = 1 / len(self.samples)
 90 | 
 91 |         current_vocab = self.samples[0].vocab
 92 | 
 93 |         matcher = Matcher(current_vocab)
 94 |         matcher.add(repr(FitnessType.FULL_MATCH), None, self.fenotype)
 95 |         contact = 0.0
 96 | 
 97 |         for sample in self.samples:
 98 |             matches = matcher(sample)
 99 |             if len(matches) > 0:
100 |                 for match in matches:
101 |                     contact += max_score_per_sample if match[2] == len(sample) and match[1] == 0 else + 0
102 |         return self._wildcard_penalty(contact)
103 | 
104 |     def _wildcard_penalty(self, contact: float) -> float:
105 |         """
106 |         Applies a penalty for the usage of token wildcard if usage of token wildcard is enabled
107 |         Args:
108 |             contact: Temporary fitness value for the current individual
109 | 
110 |         Returns: Final fitness value for the current individual
111 | 
112 |         """
113 |         if self.config.use_token_wildcard:
114 |             num_tokens = len(self.fenotype)
115 |             for item in self.fenotype:
116 |                 if item == {}:
117 |                     LOG.debug('Applying token wildcard penalty!')
118 |                     penalty = 1/num_tokens
119 |                     contact -= penalty
120 | 
121 |         return contact
122 | 
123 | 
124 | class Individual(object):
125 |     """ Individual implementation of an AI Grammatical Evolution algorithm in OOP fashion """
126 |     __slots__ = ('config', 'samples', 'grammar', 'stats', 'bin_genotype', 'int_genotype', 'fenotype', 'fitness_value')
127 | 
128 |     def __init__(self, samples: [Doc], grammar: dict, stats: Stats, dna: str = None):
129 |         """
130 |         Individual constructor, if dna is not supplied, sets up randomly its binary genotype
131 |         Args:
132 |             samples: list of Spacy doc objects
133 |             grammar: Backus Naur Form grammar notation encoded in a dictionary
134 |             stats (Stats): statistics object related with this run
135 |             dna: Optional, binary string representation
136 |         """
137 |         self.config = Config()
138 | 
139 |         self.samples = samples
140 |         self.grammar = grammar
141 |         self.stats = stats
142 |         self.bin_genotype = self._initialize() if dna is None else self.mutate(dna, self.config.mutation_probability)
143 |         self.int_genotype = self._transcription()
144 |         self.fenotype = self._translation()
145 |         self.fitness_value = Fitness(self.config, self.samples, self.fenotype).__call__()
146 | 
147 |         # Stats concerns
148 |         self._is_solution()
149 | 
150 |     @property
151 |     def __dict__(self):
152 |         """ Dictionary representation for a slotted class (that has no dict at all) """
153 |         # Above works just for POPOs
154 |         return {s: getattr(self, s, None) for s in self.__slots__ if s in ('bin_genotype', 'fenotype', 'fitness_value')}
155 | 
156 |     def __repr__(self):
157 |         """ String representation of a slotted class using hijacked dict """
158 |         return f'{self.__class__.__name__}({self.__dict__})'
159 | 
160 |     #
161 |     # Problem specific GE methods
162 |     #
163 |     def _initialize(self) -> str:
164 |         """
165 |         Sets up randomly the binary string representation of an individual
166 |         Returns: String, binary fashion
167 | 
168 |         """
169 |         return ''.join([''.join('1') if random() > 0.5
170 |                         else ''.join('0') for _ in range(0, self.config.dna_length)]).strip()
171 | 
172 |     def _transcription(self) -> [int]:
173 |         """
174 |         Converts a binary string representation to an integer representation codon by codon
175 |         Returns: List of integers
176 | 
177 |         """
178 |         return [int(self.bin_genotype[i:(i+self.config.codon_length-1)], 2)
179 |                 for i in range(0, len(self.bin_genotype), self.config.codon_length-1)]
180 | 
181 |     def _translation(self):
182 |         done = False
183 |         symbolic_string = self.grammar[S][0]  # Root
184 |         circular = cycle(self.int_genotype)
185 | 
186 |         while done is not True:
187 |             # First save previous iteration copy
188 |             old_symbolic_string = symbolic_string
189 |             ci = next(circular)
190 | 
191 |             for key in self.grammar.keys():
192 |                 symbolic_string = self._translate(ci, key, symbolic_string)
193 | 
194 |             # Check if anything changed from last iteration
195 |             if old_symbolic_string == symbolic_string:
196 |                 done = True
197 | 
198 |         translated_individual = '[' + symbolic_string + ']'
199 | 
200 |         return json.loads(translated_individual)
201 | 
202 |     def _translate(self, ci: iter, key, symbolic_string: str):
203 |         """
204 |         Helper method to reduce cognitive overload of the public method with the same name (_translation)
205 |         Args:
206 |             ci: Last circular iterator
207 |             key: Last key in the grammar dict
208 |             symbolic_string: String representation of the individual's Spacy's Rule Based Matcher pattern
209 | 
210 |         Returns: String representation of the individual's Spacy's Rule Based Matcher pattern
211 | 
212 |         """
213 |         fire = divmod(ci, len(self.grammar[key]))[1]
214 | 
215 |         if key in [T, XPS]:
216 |             fired_rule = self.grammar[key][fire]
217 |             if fired_rule == TOKEN_WILDCARD:
218 |                 symbolic_string = re.sub(key, "{}", symbolic_string, 1)
219 |             else:
220 |                 symbolic_string = re.sub(key, "{" + str(self.grammar[key][fire]) + "}", symbolic_string, 1)
221 | 
222 |         elif key is UNDERSCORE:
223 |             symbolic_string = re.sub(key, "\"_\"" + ": " + "{" + str(self.grammar[key][fire]) + "}", symbolic_string, 1)
224 | 
225 |         elif key in [P, T, F, EF]:
226 |             symbolic_string = re.sub(key, str(self.grammar[key][fire]), symbolic_string, 1)
227 | 
228 |         elif key in [IN, NOT_IN]:
229 |             key_r = key.replace(SLD, '').replace(SRD, '')
230 |             feature = "\"" + key_r + "\"" + ":" + str(self.grammar[key][fire]).replace("\'", "\"").replace("\'", "")
231 |             symbolic_string = re.sub(key, feature, symbolic_string, 1)
232 | 
233 |         elif key in [GTH, LTH, GEQ, LEQ, EQQ]:
234 |             feature = "\"" + XPS_AS[key] + "\"" + ":" + str(self.grammar[key][fire])
235 |             symbolic_string = re.sub(key, feature, symbolic_string, 1)
236 | 
237 |         else:
238 |             key_r = key.replace(SLD, '').replace(SRD, '')
239 |             fired_rule = str(self.grammar[key][fire])
240 |             if fired_rule != XPS:
241 |                 feature = "\"" + key_r + "\"" + ":" + "\"" + fired_rule + "\""
242 |             else:
243 |                 feature = "\"" + key_r + "\"" + ":" + fired_rule
244 |             symbolic_string = re.sub(key, feature, symbolic_string, 1)
245 | 
246 |         return symbolic_string
247 | 
248 |     #
249 |     # Generic GA methods
250 |     #
251 |     @classmethod
252 |     def mutate(cls, dna, mutation_probability) -> str:
253 |         """
254 |         Mutates a given dna string by a mutation probability
255 |         Args:
256 |             dna: binary string representation of a dna sequence
257 |             mutation_probability: Chances of each gen to be mutated
258 | 
259 |         Returns: Binary string
260 | 
261 |         """
262 |         mutated_dna = ''
263 | 
264 |         for gen in dna:
265 |             if random() < mutation_probability:
266 |                 if gen == '1':
267 |                     mutated_dna += '0'
268 |                 else:
269 |                     mutated_dna += '1'
270 |             else:
271 |                 mutated_dna += gen
272 |         return mutated_dna
273 | 
274 |     #
275 |     # Stats concerns
276 |     #
277 |     def _is_solution(self) -> None:
278 |         """
279 |         Method to manage AES for the given RUN
280 | 
281 |         """
282 |         if self.stats.solution_found is False:
283 |             self.stats.sum_aes(1)
284 |             if self.fitness_value >= self.config.success_threshold:
285 |                 LOG.debug('Solution found for this run!')
286 |                 self.stats.solution_found = True
287 | 


--------------------------------------------------------------------------------
/PatternOmatic/ge/population.py:
--------------------------------------------------------------------------------
  1 | """ Evolutionary Population related classes module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import random
 22 | from typing import List, Tuple, Dict
 23 | from spacy.tokens import Doc
 24 | 
 25 | from PatternOmatic.ge.individual import Individual
 26 | from PatternOmatic.ge.stats import Stats
 27 | from PatternOmatic.settings.config import Config
 28 | from PatternOmatic.settings.literals import SelectionType, ReplacementType
 29 | from PatternOmatic.settings.log import LOG
 30 | 
 31 | 
 32 | class Selection(object):
 33 |     """ Dispatches the proper selection type for population instances """
 34 |     __slots__ = '_select'
 35 | 
 36 |     def __init__(self, selection_type: SelectionType):
 37 |         self.__dispatch_selection(selection_type)
 38 | 
 39 |     def __call__(self, generation: List[Individual]) -> List[Individual]:
 40 |         """
 41 |         Performs a selection operation for the population
 42 |         Args:
 43 |             generation: A list of Individual instances
 44 | 
 45 |         Returns: A list of Individual instances
 46 | 
 47 |         """
 48 |         LOG.debug(f'Selecting individuals...')
 49 |         return self._select(generation)
 50 | 
 51 |     def __dispatch_selection(self, selection_type: SelectionType) -> None:
 52 |         """
 53 |         Sets the type of the selection operation for the current evolution
 54 |         Args:
 55 |             selection_type: SelectionType Enum
 56 | 
 57 |         Returns: None
 58 | 
 59 |         """
 60 |         if isinstance(selection_type, SelectionType):
 61 |             if selection_type == SelectionType.K_TOURNAMENT:
 62 |                 self._select = self._k_tournament
 63 |             else:
 64 |                 self._select = self._binary_tournament
 65 |         else:
 66 |             self._select = self._binary_tournament
 67 | 
 68 |     @staticmethod
 69 |     def _binary_tournament(generation: List[Individual]) -> List[Individual]:
 70 |         """
 71 |         Selects members of the current generation into the mating pool in order to produce offspring by comparing pairs
 72 |         of Individuals and adding the best of each pair to the "mating pool" until its filled
 73 | 
 74 |         Args:
 75 |             generation: A list of Individual instances
 76 | 
 77 |         Returns: A list of Individual instances
 78 | 
 79 |         """
 80 |         mating_pool = []
 81 | 
 82 |         while len(mating_pool) <= len(generation):
 83 |             i = random.randint(0, len(generation) - 1)
 84 |             j = i
 85 | 
 86 |             while j == i:
 87 |                 j = random.randint(0, len(generation) - 1)
 88 | 
 89 |             i = generation[i]
 90 |             j = generation[j]
 91 | 
 92 |             if i.fitness_value >= j.fitness_value:
 93 |                 mating_pool.append(i)
 94 |             else:
 95 |                 mating_pool.append(j)
 96 | 
 97 |         return mating_pool
 98 | 
 99 |     @staticmethod
100 |     def _k_tournament(generation: List[Individual]) -> List[Individual]:
101 |         """
102 |         Not implemented
103 |         Args:
104 |             generation: A list of Individual instances
105 | 
106 |         Raises: NotImplementedError
107 |         Returns: A list of Individual instances
108 | 
109 |         """
110 |         # TODO(me): k tournament
111 |         raise NotImplementedError
112 | 
113 | 
114 | class Recombination(object):
115 |     """ Dispatches the proper recombination type for population instances """
116 |     __slots__ = ('_recombine', 'config', 'grammar', 'samples', 'stats')
117 | 
118 |     def __init__(self, grammar: Dict, samples: List[Doc], stats: Stats):
119 |         self._recombine = None
120 |         self.config = Config()
121 |         self.grammar = grammar
122 |         self.samples = samples
123 |         self.stats = stats
124 |         self.__dispatch_recombination_type()
125 | 
126 |     def __call__(self, mating_pool: List[Individual], generation: List[Individual]) -> List[Individual]:
127 |         LOG.debug(f'Combining individuals...')
128 |         return self._recombine(mating_pool, generation)
129 | 
130 |     def __dispatch_recombination_type(self) -> None:
131 |         """
132 |         Sets the type of the selection operation for the current evolution
133 | 
134 |         Returns: None
135 | 
136 |         """
137 |         self._recombine = self._random_one_point_crossover
138 | 
139 |     def _random_one_point_crossover(
140 |             self, mating_pool: List[Individual], generation: List[Individual]) -> List[Individual]:
141 |         """
142 |         For each pair of Individual instances, recombines them produce two offsprings. Puts them all into the offspring
143 |         Args:
144 |             mating_pool: A list of Individual instances
145 |             generation: A list of Individual instances
146 | 
147 |         Returns: A list of Individual instances
148 | 
149 |         """
150 |         offspring = []
151 |         offspring_max_size = round(len(generation) * self.config.offspring_max_size_factor)
152 | 
153 |         while len(offspring) <= offspring_max_size:
154 |             parent_1 = random.choice(mating_pool)
155 |             parent_2 = random.choice(mating_pool)
156 | 
157 |             if random.random() < self.config.mating_probability:
158 |                 cut = random.randint(1, self.config.codon_length - 1) * self.config.num_codons_per_individual
159 | 
160 |                 # Create children
161 |                 child_1 = Individual(self.samples, self.grammar, self.stats,
162 |                                      dna=parent_1.bin_genotype[:cut] + parent_2.bin_genotype[
163 |                                                                        -(self.config.dna_length - cut):])
164 | 
165 |                 child_2 = Individual(self.samples, self.grammar, self.stats,
166 |                                      dna=parent_2.bin_genotype[:cut] + parent_1.bin_genotype[
167 |                                                                  -(self.config.dna_length - cut):])
168 | 
169 |                 offspring.append(child_1)
170 |                 offspring.append(child_2)
171 | 
172 |         return offspring
173 | 
174 | 
175 | class Replacement(object):
176 |     """ Dispatches the proper recombination type for population instances """
177 |     __slots__ = '_replace'
178 | 
179 |     def __init__(self, replacement_type: ReplacementType):
180 |         self.__dispatch_replacement_type(replacement_type)
181 | 
182 |     def __call__(self, generation: List[Individual], offspring: List[Individual]) \
183 |             -> Tuple[List[Individual], List[Individual]]:
184 |         LOG.debug(f'Replacing individuals...')
185 |         return self._replace(generation, offspring)
186 | 
187 |     def __dispatch_replacement_type(self, replacement_type: ReplacementType) -> None:
188 |         """
189 |         Sets the type of the replacement operation for the current evolution
190 |         Args:
191 |             replacement_type: ReplacementType Enum
192 | 
193 |         Returns: None
194 | 
195 |         """
196 |         if isinstance(replacement_type, ReplacementType):
197 |             if replacement_type == ReplacementType.MU_LAMBDA_WITH_ELITISM:
198 |                 self._replace = self._mu_lambda_elite
199 |             elif replacement_type == ReplacementType.MU_LAMBDA_WITHOUT_ELITISM:
200 |                 self._replace = self._mu_lambda_no_elite
201 |             else:
202 |                 self._replace = self._mu_plus_lambda
203 |         else:
204 |             self._replace = self._mu_plus_lambda
205 | 
206 |     @staticmethod
207 |     def _mu_plus_lambda(generation: List[Individual], offspring: List[Individual]) \
208 |             -> Tuple[List[Individual], List[Individual]]:
209 |         """
210 |         Produces the next generation combining the current generation with the offspring
211 |         Args:
212 |             generation: A list of Individual instances
213 |             offspring: A list of Individual instances
214 | 
215 |         Returns: A tuple containing two list of Individual instances
216 | 
217 |         """
218 |         replacement_pool = generation + offspring
219 |         replacement_pool.sort(key=lambda i: i.fitness_value, reverse=True)
220 |         generation = replacement_pool[:len(generation)]
221 |         offspring = []
222 | 
223 |         return generation, offspring
224 | 
225 |     @staticmethod
226 |     def _mu_lambda_elite(generation: List[Individual], offspring: List[Individual]) \
227 |             -> Tuple[List[Individual], List[Individual]]:
228 |         """
229 |         Produces the next generation using the offspring and the best Individual of the current generation
230 |         Args:
231 |             generation: A list of Individual instances
232 |             offspring: A list of Individual instances
233 | 
234 |         Returns: A tuple containing two list of Individual instances
235 | 
236 |         """
237 |         generation.sort(key=lambda i: i.fitness_value, reverse=True)
238 |         offspring.sort(key=lambda i: i.fitness_value, reverse=True)
239 |         generation[1:len(generation)] = offspring[0:len(generation)]
240 |         offspring = []
241 | 
242 |         return generation, offspring
243 | 
244 |     @staticmethod
245 |     def _mu_lambda_no_elite(generation: List[Individual], offspring: List[Individual]) \
246 |             -> Tuple[List[Individual], List[Individual]]:
247 |         """
248 |         Produces the next generation totally replacing the current generation with the offspring
249 |         Args:
250 |             generation: A list of Individual instances
251 |             offspring: A list of Individual instances
252 | 
253 |         Returns: A tuple containing two list of Individual instances
254 | 
255 |         """
256 |         offspring.sort(key=lambda i: i.fitness_value, reverse=True)
257 |         generation = offspring[0:len(generation)]
258 |         offspring = []
259 | 
260 |         return generation, offspring
261 | 
262 | 
263 | class Population(object):
264 |     """ Population implementation of an AI Grammatical Evolution algorithm in OOP fashion """
265 |     __slots__ = ('config', 'samples', 'grammar', 'stats', 'generation', 'offspring', 'best_individual',
266 |                  'selection', 'recombination', 'replacement')
267 | 
268 |     def __init__(self, samples: [Doc], grammar: dict, stats: Stats):
269 |         """
270 |         Population constructor, initializes a list of Individual objects
271 |         Args:
272 |             samples: list of Spacy doc objets
273 |             grammar: Backus Naur Form grammar notation encoded in a dictionary
274 |         """
275 |         self.config = Config()
276 | 
277 |         self.samples = samples
278 |         self.grammar = grammar
279 |         self.stats = stats
280 |         self.generation = self._genesis()
281 |         self.offspring = list()
282 |         self.best_individual = None
283 | 
284 |         self.selection = Selection(self.config.selection_type)
285 |         self.recombination = Recombination(grammar, samples, stats)
286 |         self.replacement = Replacement(self.config.replacement_type)
287 | 
288 |     #
289 |     # Population specific methods
290 |     #
291 |     def _genesis(self) -> List[Individual]:
292 |         """
293 |         Initializes the first generation
294 |         Returns: A list of individual objects
295 | 
296 |         """
297 |         return [Individual(self.samples, self.grammar, self.stats) for _ in range(0, self.config.dna_length)]
298 | 
299 |     def _best_challenge(self) -> None:
300 |         """
301 |         Compares current generation best fitness individual against previous generation best fitness individual.
302 |         Updates the best individual attribute accordingly
303 |         """
304 |         if self.best_individual is not None:
305 |             if self.generation[0].fitness_value > self.best_individual.fitness_value:
306 |                 self.best_individual = self.generation[0]
307 |         else:
308 |             self.best_individual = self.generation[0]
309 | 
310 |     #
311 |     # Evolution
312 |     #
313 |     def evolve(self):
314 |         """
315 |         Search Engine:
316 |             1) Selects individuals of the current generation to constitute who will mate
317 |             2) Crossover or recombination of the previously selected individuals
318 |             3) Replace/mix the this generation with the offspring
319 |             4) Save the best individual by fitness
320 |             5) Calculate statistics for this Run
321 |         """
322 | 
323 |         LOG.info('Evolution taking place, please wait...')
324 | 
325 |         self.stats.reset()
326 | 
327 |         for _ in range(self.config.max_generations):
328 |             mating_pool = self.selection(self.generation)
329 |             self.offspring = self.recombination(mating_pool, self.generation)
330 |             self.generation, self.offspring = self.replacement(self.generation, self.offspring)
331 |             self._best_challenge()
332 | 
333 |         LOG.info(f'Best candidate found on this run: {self.best_individual}')
334 | 
335 |         # Stats concerns
336 |         self.stats.add_most_fitted(self.best_individual)
337 |         self.stats.add_mbf(self.best_individual.fitness_value)
338 | 
339 |         if self.best_individual.fitness_value > self.config.success_threshold:
340 |             self.stats.add_sr(True)
341 |         else:
342 |             self.stats.add_sr(False)
343 | 


--------------------------------------------------------------------------------
/PatternOmatic/ge/stats.py:
--------------------------------------------------------------------------------
  1 | """ Grammatical Evolution performance metrics module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import operator
 22 | from time import time
 23 | 
 24 | from PatternOmatic.settings.literals import ReportFormat
 25 | from PatternOmatic.settings.config import Config
 26 | 
 27 | 
 28 | class Stats(object):
 29 |     """ Class responsible of handling performance metrics """
 30 |     __slots__ = [
 31 |         'config',
 32 |         'success_rate_accumulator',
 33 |         'mbf_accumulator',
 34 |         'aes_accumulator',
 35 |         'time_accumulator',
 36 |         'most_fitted_accumulator',
 37 |         'solution_found',
 38 |         'success_rate',
 39 |         'mbf',
 40 |         'aes',
 41 |         'mean_time',
 42 |         'aes_counter'
 43 |     ]
 44 | 
 45 |     def __init__(self):
 46 |         """ Stats instances constructor """
 47 |         self.config = Config()
 48 |         self.success_rate_accumulator = list()
 49 |         self.mbf_accumulator = list()
 50 |         self.aes_accumulator = list()
 51 |         self.time_accumulator = list()
 52 |         self.most_fitted_accumulator = list()
 53 |         self.solution_found = False
 54 |         self.success_rate = None
 55 |         self.mbf = None
 56 |         self.aes = None
 57 |         self.mean_time = None
 58 | 
 59 |         self.aes_counter = 0
 60 | 
 61 |     @property
 62 |     def __dict__(self):
 63 |         """ Dictionary representation for a slotted class (that has no dict at all) """
 64 |         # Above works just for POPOs
 65 |         stats_dict = \
 66 |             {s: getattr(self, s, None) for s in self.__slots__ if s in ('success_rate', 'mbf', 'aes', 'mean_time')}
 67 | 
 68 |         most_fitted = self.get_most_fitted()
 69 |         most_fitted_dict = {'most_fitted': most_fitted.__dict__} if most_fitted is not None else {'most_fitted': None}
 70 |         stats_dict.update(most_fitted_dict)
 71 | 
 72 |         return stats_dict
 73 | 
 74 |     def __repr__(self):
 75 |         """ String representation of a slotted class using hijacked dict """
 76 |         return f'{self.__class__.__name__}({self.__dict__})'
 77 | 
 78 |     def __iter__(self):
 79 |         """ Enable dict(self) """
 80 |         yield from self.__dict__.items()
 81 | 
 82 |     #
 83 |     # Accumulators & Counters
 84 |     #
 85 |     def add_sr(self, sr: bool) -> None:
 86 |         """
 87 |         Adds a new Success Rate value to the accumulator
 88 |         Args:
 89 |             sr: Boolean value that indicates if the RUN succeeded (True) or not (False)
 90 | 
 91 |         """
 92 |         self.success_rate_accumulator.append(sr)
 93 | 
 94 |     def add_mbf(self, bf: float) -> None:
 95 |         """
 96 |         Adds a new Best Fitness value to the accumulator
 97 |         Args:
 98 |             bf: Best fitness fount over a RUN
 99 | 
100 |         """
101 |         self.mbf_accumulator.append(bf)
102 | 
103 |     def add_aes(self, es: int) -> None:
104 |         """
105 |         Adds a new Evaluations to Solution value to the accumulator
106 |         Args:
107 |             es: Number of evaluations to solution over a RUN
108 | 
109 |         """
110 |         self.aes_accumulator.append(es)
111 | 
112 |     def add_time(self, time_interval: float) -> None:
113 |         """
114 |         Adds a new Time lapsed value to the accumulator
115 |         Args:
116 |             time_interval: Time lapsed of a RUN
117 | 
118 |         """
119 |         self.time_accumulator.append(time_interval)
120 | 
121 |     def add_most_fitted(self, individual: any) -> None:
122 |         """
123 |         Adds a new individual to the accumulator
124 |         Args:
125 |             individual: Individual with best fitness found over a RUN
126 | 
127 |         Returns:
128 | 
129 |         """
130 |         self.most_fitted_accumulator.append(individual)
131 | 
132 |     def sum_aes(self, es: int) -> None:
133 |         """
134 |         Sums a new Evaluations to Solution value to the counter
135 |         Args:
136 |             es: Number of evaluations to Solution of a given Run
137 | 
138 |         Returns:
139 | 
140 |         """
141 |         self.aes_counter += es
142 | 
143 |     #
144 |     # Metrics
145 |     #
146 |     def reset(self):
147 |         """ Resets variables that depend on the run """
148 |         self.aes_counter = 0
149 |         self.solution_found = False
150 | 
151 |     def calculate_metrics(self):
152 |         """ Calculates the common GE evaluation metrics """
153 |         self.add_aes(self.aes_counter)
154 |         self.success_rate = Stats.avg(self.success_rate_accumulator)
155 |         self.mbf = Stats.avg(self.mbf_accumulator)
156 |         self.aes = Stats.avg(self.aes_accumulator)
157 |         self.mean_time = Stats.avg(self.time_accumulator)
158 | 
159 |     #
160 |     # Auxiliary methods
161 |     #
162 |     def get_most_fitted(self):
163 |         """
164 |         Best individual found
165 |         Returns: Individual with Best Fitness found for this Execution
166 | 
167 |         """
168 |         return max(self.most_fitted_accumulator, key=operator.attrgetter('fitness_value')) \
169 |             if len(self.most_fitted_accumulator) > 0 else None
170 | 
171 |     @staticmethod
172 |     def avg(al: list) -> float:
173 |         """
174 |         Returns the mean of a list if the list is not empty
175 |         Args:
176 |             al: List instance
177 | 
178 |         Returns: float, the mean/average of the list
179 | 
180 |         """
181 |         return sum(al) / len(al) if len(al) > 0 else 0.0
182 | 
183 |     def persist(self) -> None:
184 |         """
185 |         Makes or append execution result to file. If no valid format is specified CSV will be used as default
186 |         Returns: None
187 | 
188 |         """
189 |         if self.config.report_format == ReportFormat.JSON:
190 |             with open(self.config.report_path, mode='a+') as f:
191 |                 f.writelines(f'{dict(self)}' + '\n')
192 |         else:
193 |             with open(self.config.report_path, mode='a+') as f:
194 |                 f.writelines(self._to_csv() + '\n')
195 | 
196 |     def _to_csv(self):
197 |         """
198 |         Generates Comma Separated Value (csv) representation of a Stats instance object
199 |         Returns: String, csv instance representation
200 | 
201 |         """
202 |         csv = f'{time()}' + '\t'
203 | 
204 |         for k, v in self.__dict__.items():
205 |             if not type(v) is dict:
206 |                 csv = csv + str(v) + '\t'
207 |             else:
208 |                 for _, vi in v.items():
209 |                     csv = csv + str(vi) + '\t'
210 |         return csv
211 | 


--------------------------------------------------------------------------------
/PatternOmatic/nlp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/nlp/__init__.py


--------------------------------------------------------------------------------
/PatternOmatic/nlp/bnf.py:
--------------------------------------------------------------------------------
  1 | """ Backus Naur Form Grammar Generator module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | from inspect import getmembers
 22 | from spacy.tokens import Doc, Token
 23 | from PatternOmatic.settings.config import Config
 24 | from PatternOmatic.settings.literals import S, P, T, F, OP, NEGATION, ZERO_OR_ONE, ZERO_OR_MORE, ONE_OR_MORE, LENGTH, \
 25 |     XPS, IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH, TOKEN_WILDCARD, UNDERSCORE, EF, ORTH, TEXT, LOWER, POS, TAG, DEP, LEMMA, \
 26 |     SHAPE, ENT_TYPE, IS_ALPHA, IS_ASCII, IS_DIGIT, IS_BRACKET, IS_LOWER, IS_PUNCT, IS_QUOTE, IS_SPACE, IS_TITLE, \
 27 |     IS_OOV, IS_UPPER, IS_STOP, IS_CURRENCY, IS_LEFT_PUNCT, IS_RIGHT_PUNCT, LIKE_NUM, LIKE_EMAIL, \
 28 |     LANG, NORM, PREFIX, SENTIMENT, STRING, SUFFIX, TEXT_WITH_WS, WHITESPACE, LIKE_URL, MATCHER_SUPPORTED_ATTRIBUTES, \
 29 |     ENT_ID, ENT_IOB, ENT_KB_ID, HAS_VECTOR
 30 | from PatternOmatic.settings.log import LOG
 31 | 
 32 | 
 33 | #
 34 | # Dynamic Grammar (Backus Naur Form) Generator
 35 | #
 36 | def dynamic_generator(samples: [Doc]) -> dict:
 37 |     """
 38 |     Dynamically generates a grammar in Backus Naur Form (BNF) notation representing the available Spacy NLP
 39 |     Linguistic Feature values of the given sample list of Doc instances
 40 |     Args:
 41 |         samples: List of Spacy Doc objects
 42 | 
 43 |     Returns: Backus Naur Form grammar notation encoded in a dictionary
 44 | 
 45 |     """
 46 |     config = Config()
 47 | 
 48 |     LOG.info(f'Generating BNF based on the following samples: {str(samples)}')
 49 | 
 50 |     # BNF root
 51 |     pattern_grammar = {S: [P]}
 52 | 
 53 |     # Watch out features of seen samples and max number of tokens per sample
 54 |     max_length_token, min_length_token, features_dict, extended_features = _features_seen(samples)
 55 | 
 56 |     # Update times token per pattern [Min length of tokens, Max length of tokens] interval
 57 |     pattern_grammar[P] = _symbol_stacker(T, max_length_token, min_length_token)
 58 | 
 59 |     # Update times features per token (Max length of features)
 60 |     pattern_grammar[T] = _symbol_stacker(F, _get_features_per_token(features_dict))
 61 | 
 62 |     if config.use_token_wildcard is True:
 63 |         pattern_grammar[T].append(TOKEN_WILDCARD)
 64 | 
 65 |     # Update available features (just the features list)
 66 |     list_of_features = list(features_dict.keys())
 67 |     if config.use_grammar_operators is True and config.use_extended_pattern_syntax is False:
 68 |         pattern_grammar = _add_grammar_operators(pattern_grammar, list_of_features)
 69 |     elif config.use_extended_pattern_syntax is True and config.use_grammar_operators is False:
 70 |         pattern_grammar = _add_extended_pattern_syntax(pattern_grammar, list_of_features, features_dict)
 71 |     else:
 72 |         pattern_grammar[F] = list_of_features
 73 | 
 74 |     # Update each feature possible values
 75 |     for k, v in features_dict.items():
 76 |         if config.use_extended_pattern_syntax is True:
 77 |             v.append(XPS)
 78 |         pattern_grammar.update({k: v})
 79 | 
 80 |     if config.use_custom_attributes is True:
 81 |         pattern_grammar = _add_custom_attributes(pattern_grammar, extended_features)
 82 | 
 83 |     LOG.info(f'Dynamically generated BNF: {str(pattern_grammar)}')
 84 | 
 85 |     return pattern_grammar
 86 | 
 87 | 
 88 | #
 89 | # BNF Utilities
 90 | #
 91 | def _features_seen(samples: [Doc]) -> (int, int, dict, dict):
 92 |     """
 93 |     Builds up a dictionary containing Spacy Linguistic Feature Keys and their respective seen values for the sample
 94 |     Args:
 95 |         samples: List of Spacy Doc objects
 96 | 
 97 |     Returns: Integer, the max length of a doc within the sample and a dict of features
 98 | 
 99 |     """
100 |     config = Config()
101 | 
102 |     # Just tokenizer features
103 |     orth_list = []
104 |     text_list = []
105 |     lower_list = []
106 |     length_list = []
107 |     shape_list = []
108 | 
109 |     # For boolean features
110 |     bool_list = [True, False]
111 | 
112 |     # Require more than a tokenizer
113 |     pos_list = []
114 |     tag_list = []
115 |     dep_list = []
116 |     lemma_list = []
117 |     ent_type_list = []
118 | 
119 |     # Capture the len of the largest doc
120 |     max_doc_length = 0
121 |     min_doc_length = 999999999
122 | 
123 |     # Set token extensions
124 |     if config.use_custom_attributes is True:
125 |         _set_token_extension_attributes(samples[0][0])
126 |         extended_features = _extended_features_seen([token for sample in samples for token in sample])
127 |     else:
128 |         extended_features = {UNDERSCORE: {}}
129 | 
130 |     for sample in samples:
131 |         sample_length = len(sample)
132 | 
133 |         for token in sample:
134 |             orth_list.append(token.orth_)
135 |             text_list.append(token.text)
136 |             lower_list.append(token.lower_)
137 |             length_list.append(len(token))
138 |             pos_list.append(token.pos_)
139 |             tag_list.append(token.tag_)
140 |             dep_list.append(token.dep_)
141 |             lemma_list.append(token.lemma_)
142 |             shape_list.append(token.shape_)
143 |             ent_type_list.append(token.ent_type_)
144 | 
145 |         # Checks for max/min length of tokens per sample
146 |         if sample_length > max_doc_length:
147 |             max_doc_length = sample_length
148 | 
149 |         if sample_length < min_doc_length:
150 |             min_doc_length = sample_length
151 | 
152 |     if config.use_uniques is True:
153 |         features = {ORTH: sorted(list(set(orth_list))),
154 |                     TEXT: sorted(list(set(text_list))),
155 |                     LOWER: sorted(list(set(lower_list))),
156 |                     LENGTH: sorted(list(set(length_list))),
157 |                     POS: sorted(list(set(pos_list))),
158 |                     TAG: sorted(list(set(tag_list))),
159 |                     DEP: sorted(list(set(dep_list))),
160 |                     LEMMA: sorted(list(set(lemma_list))),
161 |                     SHAPE: sorted(list(set(shape_list))),
162 |                     ENT_TYPE: sorted(list(set(ent_type_list)))}
163 |     else:
164 |         features = {ORTH: orth_list,
165 |                     TEXT: text_list,
166 |                     LOWER: lower_list,
167 |                     LENGTH: length_list,
168 |                     POS: pos_list,
169 |                     TAG: tag_list,
170 |                     DEP: dep_list,
171 |                     LEMMA: lemma_list,
172 |                     SHAPE: shape_list,
173 |                     ENT_TYPE: ent_type_list}
174 | 
175 |     # Add boolean features
176 |     if config.use_boolean_features is True:
177 |         features.update({
178 |             IS_ALPHA: bool_list,
179 |             IS_ASCII: bool_list,
180 |             IS_DIGIT: bool_list,
181 |             IS_LOWER: bool_list,
182 |             IS_UPPER: bool_list,
183 |             IS_TITLE: bool_list,
184 |             IS_PUNCT: bool_list,
185 |             IS_SPACE: bool_list,
186 |             IS_STOP: bool_list,
187 |             LIKE_NUM: bool_list,
188 |             LIKE_URL: bool_list,
189 |             LIKE_EMAIL: bool_list
190 |         })
191 | 
192 |     # Drop all observations equal to empty string
193 |     features = _feature_pruner(features)
194 |     extended_features[UNDERSCORE] = _feature_pruner(extended_features[UNDERSCORE])
195 | 
196 |     return max_doc_length, min_doc_length, features, extended_features
197 | 
198 | 
199 | def _set_token_extension_attributes(token: Token) -> None:
200 |     """
201 |     Given a Spacy Token instance, register all the Spacy token attributes not accepted by the Spacy Matcher
202 |     as custom attributes inside the Token Extensions (token._. space)
203 |     Returns: None
204 | 
205 |     """
206 |     # Retrieve cleaned up Token Attributes
207 |     token_attributes = _clean_token_attributes(
208 |         {k: v for k, v in getmembers(token) if type(v) in (str, bool, float)})
209 | 
210 |     # Set token custom attributes
211 |     lambda_list = []
212 |     i = 0
213 |     for k, v in token_attributes.items():
214 |         lambda_list.append(lambda token_=token, k_=k: getattr(token_, k_))
215 |         token.set_extension(str('custom_'+k).upper(), getter=lambda_list[i])
216 |         i += 1
217 | 
218 | 
219 | def _clean_token_attributes(token_attributes: dict) -> dict:
220 |     """
221 |     Removes from input dict keys contained in a set that represents the Spacy Matcher supported token attributes
222 |     Args:
223 |         token_attributes: dict of token features
224 | 
225 |     Returns: Token attributes dict without Spacy Matcher's supported attribute keys
226 | 
227 |     """
228 |     token_attributes.pop('__doc__')
229 |     for item in MATCHER_SUPPORTED_ATTRIBUTES:
230 |         token_attributes.pop(item)
231 | 
232 |     return token_attributes
233 | 
234 | 
235 | def _extended_features_seen(tokens: [Token]) -> dict:
236 |     """
237 |     Builds up a dictionary containing Spacy Linguistic Feature Keys and their respective seen values for the
238 |     input token list extended attributes (those attributes not accepted by the Spacy Matcher by default,
239 |     included as token extensions)
240 |     Args:
241 |         tokens: List of Spacy Token instances
242 | 
243 |     Returns: dict of features
244 | 
245 |     """
246 |     bool_list = [True, False]
247 | 
248 |     extended_features = \
249 |         {
250 |             UNDERSCORE: {
251 |                 ENT_ID: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_ID_') for token in tokens]))),
252 |                 ENT_IOB: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_IOB_') for token in tokens]))),
253 |                 ENT_KB_ID: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_ENT_KB_ID_') for token in tokens]))),
254 |                 HAS_VECTOR: bool_list,
255 |                 IS_BRACKET: bool_list,
256 |                 IS_CURRENCY: bool_list,
257 |                 IS_LEFT_PUNCT: bool_list,
258 |                 IS_OOV: bool_list,
259 |                 IS_QUOTE: bool_list,
260 |                 IS_RIGHT_PUNCT: bool_list,
261 |                 # IS_SENT_START:
262 |                 #     sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_IS_SENT_START') for token in tokens]))),
263 |                 LANG: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_LANG_') for token in tokens]))),
264 |                 NORM: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_NORM_') for token in tokens]))),
265 |                 PREFIX: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_PREFIX_') for token in tokens]))),
266 |                 # PROB:
267 |                 #     sorted(list(set([abs(getattr(getattr(token, '_'), 'CUSTOM_PROB')) for token in tokens]))),
268 |                 SENTIMENT: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_SENTIMENT') for token in tokens]))),
269 |                 STRING: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_STRING') for token in tokens]))),
270 |                 SUFFIX: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_SUFFIX_') for token in tokens]))),
271 |                 TEXT_WITH_WS: sorted(list(set(
272 |                     [getattr(getattr(token, '_'), 'CUSTOM_TEXT_WITH_WS') for token in tokens]))),
273 |                 WHITESPACE: sorted(list(set([getattr(getattr(token, '_'), 'CUSTOM_WHITESPACE_') for token in tokens])))
274 |             }
275 |         }
276 | 
277 |     return extended_features
278 | 
279 | 
280 | def _feature_pruner(features: dict) -> dict:
281 |     """
282 |     Prunes dict keys whose values contain a list of repeated items
283 |     Args:
284 |         features: dict
285 | 
286 |     Returns: pruned dict
287 | 
288 |     """
289 |     # Drop all observations equal to empty string
290 |     to_del_list = list()
291 |     for k in features.keys():
292 |         if len(features[k]) == 1 and features[k][0] == '':
293 |             to_del_list.append(k)
294 | 
295 |     for k_item in to_del_list:
296 |         features.pop(k_item)
297 | 
298 |     return features
299 | 
300 | 
301 | def _symbol_stacker(symbol: str, max_length: int, min_length: int = 1) -> list:
302 |     """
303 |     Given a symbol creates a list of length max_length where each item is symbol concat previous list item
304 |     Args:
305 |         symbol: string
306 |         max_length: list max length
307 | 
308 |     Returns: list of symbol
309 | 
310 |     """
311 |     symbol_times_list = list()
312 |     last = ''
313 | 
314 |     for _ in range(max_length):
315 |         if last == '':
316 |             last = symbol
317 |         else:
318 |             last = last + "," + symbol
319 | 
320 |         symbol_times_list.append(last)
321 | 
322 |     if 1 < min_length <= max_length:
323 |         symbol_times_list = symbol_times_list[min_length-1:]
324 | 
325 |     return symbol_times_list
326 | 
327 | 
328 | def _get_features_per_token(features_dict: dict) -> int:
329 |     """
330 |     Given the configuration set up, determine the maximum number of features per token at grammar
331 |     Args:
332 |         features_dict: dictionary of features keys with all possible feature value options
333 | 
334 |     Returns: integer
335 | 
336 |     """
337 |     config = Config()
338 | 
339 |     if config.features_per_token <= 0:
340 |         max_length_features = len(features_dict.keys())
341 |     else:
342 |         if len(features_dict.keys()) < config.features_per_token + 1:
343 |             max_length_features = len(features_dict.keys())
344 |         else:
345 |             max_length_features = config.features_per_token
346 | 
347 |     return max_length_features
348 | 
349 | 
350 | def _add_grammar_operators(pattern_grammar: dict, list_of_features: list) -> dict:
351 |     """
352 |     Adds support to Spacy's grammar operators usage
353 |     Args:
354 |         pattern_grammar: BNF dict
355 |         list_of_features: list of token features
356 | 
357 |     Returns: Backus Naur Form grammar notation encoded in a dictionary with Spacy's grammar operators
358 | 
359 |     """
360 |     list_of_features_op = list()
361 |     for feature in list_of_features:
362 |         list_of_features_op.append(feature)
363 |         list_of_features_op.append(feature + ',' + OP)
364 |     pattern_grammar[F] = list_of_features_op
365 |     pattern_grammar[OP] = [NEGATION, ZERO_OR_ONE, ONE_OR_MORE, ZERO_OR_MORE]
366 |     return pattern_grammar
367 | 
368 | 
369 | def _add_extended_pattern_syntax(pattern_grammar: dict, list_of_features: list, features_dict: dict) -> dict:
370 |     """
371 |     Adds support to the extended pattern syntax at BNF dicts
372 |     Args:
373 |         pattern_grammar: BNF dict
374 |         list_of_features: list of token features
375 |         features_dict: dict of token features
376 | 
377 |     Returns:
378 |         dict: Backus Naur Form grammar notation encoded in a dictionary with Spacy's extended pattern syntax
379 |     """
380 |     tmp_lengths = features_dict[LENGTH].copy()
381 |     full_terminal_stack = _all_feature_terminal_list(features_dict)
382 |     pattern_grammar[F] = list_of_features
383 |     pattern_grammar[XPS] = [IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH]
384 |     pattern_grammar[IN] = full_terminal_stack
385 |     pattern_grammar[NOT_IN] = full_terminal_stack
386 |     pattern_grammar[EQQ] = tmp_lengths
387 |     pattern_grammar[GEQ] = tmp_lengths
388 |     pattern_grammar[LEQ] = tmp_lengths
389 |     pattern_grammar[GTH] = tmp_lengths
390 |     pattern_grammar[LTH] = tmp_lengths
391 | 
392 |     return pattern_grammar
393 | 
394 | 
395 | def _all_feature_terminal_list(features_dict: dict) -> list:
396 |     """
397 |     Stacks all feature terminal options in a list of lists to be used for the extended pattern syntax set operators
398 |     Args:
399 |         features_dict: dictionary of feature keys with all possible feature value options
400 | 
401 |     Returns:
402 | 
403 |     """
404 |     all_terminal_list = list()
405 | 
406 |     for item in list(features_dict.items()):
407 |         current_terminal_holder = list()
408 | 
409 |         for terminal_list_item in item[1]:
410 |             if len(current_terminal_holder) > 0:
411 |                 temp_list = list(current_terminal_holder[-1])
412 |                 temp_list.append(terminal_list_item)
413 |                 current_terminal_holder.append(temp_list)
414 |             else:
415 |                 current_terminal_holder.append([terminal_list_item])
416 | 
417 |         all_terminal_list += current_terminal_holder
418 | 
419 |     all_terminal_list = [ele for ind, ele in enumerate(all_terminal_list) if ele not in all_terminal_list[:ind]]
420 |     return all_terminal_list
421 | 
422 | 
423 | def _add_custom_attributes(pattern_grammar: dict, extended_features: dict) -> dict:
424 |     """
425 |     Adds support to a specific set of custom attributes at BNF dict
426 |     Args:
427 |         pattern_grammar: BNF dict
428 |         extended_features: dict of token features not supported by default by the Spacy's Matcher
429 | 
430 |     Returns: Backus Naur Form grammar notation encoded in a dictionary with Spacy's custom attributes
431 | 
432 |     """
433 |     pattern_grammar[UNDERSCORE] = _symbol_stacker(EF, _get_features_per_token(extended_features[UNDERSCORE]))
434 |     pattern_grammar[EF] = list(extended_features[UNDERSCORE].keys())
435 |     pattern_grammar.update(extended_features[UNDERSCORE].items())
436 |     pattern_grammar[T].append(UNDERSCORE)
437 |     pattern_grammar[T].append(F + "," + UNDERSCORE)
438 |     return pattern_grammar
439 | 


--------------------------------------------------------------------------------
/PatternOmatic/settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/PatternOmatic/settings/__init__.py


--------------------------------------------------------------------------------
/PatternOmatic/settings/config.py:
--------------------------------------------------------------------------------
  1 | """ Configuration Management module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | from __future__ import annotations
 22 | import configparser
 23 | from typing import Optional
 24 | from PatternOmatic.settings.log import LOG
 25 | from PatternOmatic.settings.literals import GE, MAX_RUNS, SUCCESS_THRESHOLD, POPULATION_SIZE, MAX_GENERATIONS, \
 26 |     CODON_LENGTH, CODONS_X_INDIVIDUAL, MUTATION_PROBABILITY, OFFSPRING_FACTOR, MATING_PROBABILITY, K_VALUE, \
 27 |     SELECTION_TYPE, REPLACEMENT_TYPE, RECOMBINATION_TYPE, RecombinationType, ReplacementType, SelectionType, \
 28 |     FitnessType, FITNESS_FUNCTION_TYPE, \
 29 |     DGG, FEATURES_X_TOKEN, USE_BOOLEAN_FEATURES, USE_CUSTOM_ATTRIBUTES, USE_UNIQUES, \
 30 |     USE_GRAMMAR_OPERATORS, USE_TOKEN_WILDCARD, USE_EXTENDED_PATTERN_SYNTAX, REPORT_PATH, IO, ReportFormat, REPORT_FORMAT
 31 | 
 32 | 
 33 | class SingletonMetaNaive(type):
 34 |     """ The Naive Singleton Design Pattern of type Metaclass builder """
 35 | 
 36 |     _instance: Optional[Config, None] = None
 37 | 
 38 |     def __call__(cls, config_file_path: str = None) -> Config:
 39 |         if cls._instance is None:
 40 |             LOG.debug('Creating config object!')
 41 |             cls._instance = super().__call__(config_file_path)
 42 |         return cls._instance
 43 | 
 44 |     def clear_instance(self):
 45 |         """ For testing purposes, destroy Singleton instance """
 46 |         LOG.debug('Removing config object!')
 47 |         self._instance = None
 48 |         del self._instance
 49 | 
 50 | 
 51 | class Config(metaclass=SingletonMetaNaive):
 52 |     """ Singleton Configuration package's Class"""
 53 |     __slots__ = (
 54 |         'max_runs',
 55 |         'success_threshold',
 56 |         'population_size',
 57 |         'max_generations',
 58 |         'codon_length',
 59 |         'num_codons_per_individual',
 60 |         'dna_length',
 61 |         'mutation_probability',
 62 |         'offspring_max_size_factor',
 63 |         'mating_probability',
 64 |         'k_value',
 65 |         'selection_type',
 66 |         'recombination_type',
 67 |         'replacement_type',
 68 |         'fitness_function_type',
 69 |         'features_per_token',
 70 |         'use_boolean_features',
 71 |         'use_custom_attributes',
 72 |         'use_uniques',
 73 |         'use_grammar_operators',
 74 |         'use_token_wildcard',
 75 |         'use_extended_pattern_syntax',
 76 |         'report_path',
 77 |         'report_format',
 78 |         'file_path'
 79 |     )
 80 | 
 81 |     def __init__(self, config_file_path: str = None):
 82 |         """
 83 |         Config object constructor
 84 |         Args:
 85 |             config_file_path: Path for a configuration file
 86 |         """
 87 |         config_parser = configparser.ConfigParser()
 88 | 
 89 |         if config_file_path is None:
 90 |             LOG.warning(f'Configuration file not provided. Falling back to default values')
 91 |             self.file_path = None
 92 |         else:
 93 |             file_list = config_parser.read(config_file_path)
 94 |             if len(file_list) == 0:
 95 |                 LOG.warning(f'File {config_file_path} not found. Falling back to default values')
 96 |                 self.file_path = None
 97 |             else:
 98 |                 self.file_path = config_file_path
 99 | 
100 |         #
101 |         # GE configuration parameters
102 |         #
103 |         self.max_runs = self._validate_config_argument(GE, MAX_RUNS, 4, config_parser)
104 |         self.success_threshold = self._validate_config_argument(GE, SUCCESS_THRESHOLD, 0.8, config_parser)
105 |         self.population_size = self._validate_config_argument(GE, POPULATION_SIZE, 10, config_parser)
106 |         self.max_generations = self._validate_config_argument(GE, MAX_GENERATIONS, 3, config_parser)
107 |         self.codon_length = self._validate_config_argument(GE, CODON_LENGTH, 8, config_parser)
108 |         self.num_codons_per_individual = self._validate_config_argument(GE, CODONS_X_INDIVIDUAL, 4, config_parser)
109 |         self.dna_length = self.codon_length * self.num_codons_per_individual
110 |         self.mutation_probability = self._validate_config_argument(GE, MUTATION_PROBABILITY, 0.5, config_parser)
111 |         self.offspring_max_size_factor = self._validate_config_argument(GE, OFFSPRING_FACTOR, 3.5, config_parser)
112 |         self.mating_probability = self._validate_config_argument(GE, MATING_PROBABILITY, 0.9, config_parser)
113 |         self.k_value = self._validate_config_argument(GE, K_VALUE, 3, config_parser)
114 | 
115 |         #
116 |         # GE configuration methods
117 |         #
118 |         self.selection_type = SelectionType(
119 |             self._validate_config_argument(GE, SELECTION_TYPE, 0, config_parser))
120 | 
121 |         self.recombination_type = RecombinationType(
122 |             self._validate_config_argument(GE, RECOMBINATION_TYPE, 0, config_parser))
123 | 
124 |         self.replacement_type = ReplacementType(
125 |             self._validate_config_argument(GE, REPLACEMENT_TYPE, 0, config_parser))
126 | 
127 |         self.fitness_function_type = FitnessType(
128 |             self._validate_config_argument(GE, FITNESS_FUNCTION_TYPE, 1, config_parser))
129 | 
130 |         #
131 |         # BNF Grammar Generation configuration options
132 |         #
133 |         self.features_per_token = self._validate_config_argument(DGG, FEATURES_X_TOKEN, 1, config_parser)
134 |         self.use_boolean_features = self._validate_config_argument(DGG, USE_BOOLEAN_FEATURES, False, config_parser)
135 |         self.use_custom_attributes = self._validate_config_argument(DGG, USE_CUSTOM_ATTRIBUTES, False, config_parser)
136 |         self.use_uniques = self._validate_config_argument(DGG, USE_UNIQUES, True, config_parser)
137 |         self.use_grammar_operators = self._validate_config_argument(DGG, USE_GRAMMAR_OPERATORS, False, config_parser)
138 |         self.use_token_wildcard = self._validate_config_argument(DGG, USE_TOKEN_WILDCARD, False, config_parser)
139 |         self.use_extended_pattern_syntax = \
140 |             self._validate_config_argument(DGG, USE_EXTENDED_PATTERN_SYNTAX, False, config_parser)
141 | 
142 |         #
143 |         # Configuration validation
144 |         #
145 |         self._check_xps_op_restriction()
146 | 
147 |         #
148 |         # IO
149 |         #
150 |         self.report_path = \
151 |             self._validate_config_argument(IO, REPORT_PATH, '/tmp/patternomatic_report.txt', config_parser)
152 | 
153 |         self.report_format = ReportFormat(self._validate_config_argument(IO, REPORT_FORMAT, 0, config_parser))
154 | 
155 |         LOG.info(f'Configuration instance: {self}')
156 | 
157 |     def __setattr__(self, key, value) -> None:
158 |         """
159 |         Overrides method to be used with slots
160 |         Args:
161 |             key: An object slotted property
162 |             value: An intended value for the object key
163 | 
164 |         Returns: None
165 | 
166 |         """
167 |         if hasattr(self, key):
168 |             if self._preserve_property_type(getattr(self, key), value):
169 |                 super(Config, self).__setattr__(key, value)
170 |                 LOG.info(f'Updating configuration parameter {key.upper()} with value {value}')
171 |                 if key == USE_EXTENDED_PATTERN_SYNTAX.lower() or key == USE_GRAMMAR_OPERATORS.lower():
172 |                     self._check_xps_op_restriction()
173 |             else:
174 |                 LOG.warning(f'Invalid data type {type(value)} for property {key}. Skipping update')
175 |         else:
176 |             super(Config, self).__setattr__(key, value)
177 | 
178 |     @property
179 |     def __dict__(self):
180 |         """ Hijacks dictionary for this config slotted class """
181 |         return {s: getattr(self, s, None) for s in self.__slots__}
182 | 
183 |     def __repr__(self):
184 |         """ Representation of config instance """
185 |         return f'{self.__class__.__name__}({self.__dict__})'
186 | 
187 |     #
188 |     # Utilities
189 |     #
190 |     @staticmethod
191 |     def _validate_config_argument(section, option, default, config_parser):
192 |         """
193 | 
194 |         Args:
195 |             section:
196 |             option:
197 |             default:
198 |             config_parser:
199 | 
200 |         Returns:
201 | 
202 |         """
203 |         try:
204 |             if isinstance(default, bool):
205 |                 value = config_parser.getboolean(section, option, fallback=default)
206 |             elif isinstance(default, int):
207 |                 value = config_parser.getint(section, option, fallback=default)
208 |             elif isinstance(default, float):
209 |                 value = config_parser.getfloat(section, option, fallback=default)
210 |             elif isinstance(default, str):
211 |                 value = config_parser.get(section, option, fallback=default)
212 |             else:
213 |                 value = default
214 |         except ValueError:
215 |             LOG.warning(f'[{section}][{option}] configuration parameter wrongly set. '
216 |                         f'Falling back to its default value: {default}')
217 |             value = default
218 | 
219 |         LOG.debug(f'[{section}][{option}] {value}')
220 |         return value
221 | 
222 |     @staticmethod
223 |     def _preserve_property_type(_property, value):
224 |         return isinstance(value, type(_property))
225 | 
226 |     #
227 |     # Problem specific restrictions
228 |     #
229 |     def _check_xps_op_restriction(self) -> None:
230 |         """
231 |         Spacy's Grammar Operators and Quantifiers and the Spacy's Extended Pattern Syntax can not be used together  at
232 |         the same time in a pattern for the Spacy's Rule Based Matcher.
233 | 
234 |         This method checks the provided configuration and disables the Spacy's Extended Pattern Syntax if both
235 |         mechanisms are found enabled at the provided configuration.
236 | 
237 |         Returns: None
238 | 
239 |         """
240 |         if hasattr(self, USE_EXTENDED_PATTERN_SYNTAX.lower()) and hasattr(self, USE_GRAMMAR_OPERATORS.lower()) and \
241 |                 self.use_extended_pattern_syntax is True and self.use_grammar_operators is True:
242 |             LOG.warning(f'Extended Pattern Syntax is not compatible with the usage of Grammar Operators. '
243 |                         f'Extended Pattern Syntax has been disabled!')
244 |             self.use_extended_pattern_syntax = False
245 | 


--------------------------------------------------------------------------------
/PatternOmatic/settings/literals.py:
--------------------------------------------------------------------------------
  1 | """ Literals/constants module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | from enum import Enum, unique
 22 | 
 23 | 
 24 | #
 25 | # GE Related literals
 26 | #
 27 | @unique
 28 | class SelectionType(Enum):
 29 |     """ Evolutionary selection types """
 30 |     BINARY_TOURNAMENT = 0
 31 |     K_TOURNAMENT = 1
 32 | 
 33 |     def __repr__(self):
 34 |         """ Human readable """
 35 |         return self.name
 36 | 
 37 | 
 38 | @unique
 39 | class RecombinationType(Enum):
 40 |     """ Evolutionary recombination types enum """
 41 |     RANDOM_ONE_POINT_CROSSOVER = 0
 42 | 
 43 |     def __repr__(self):
 44 |         """ Human readable """
 45 |         return self.name
 46 | 
 47 | 
 48 | @unique
 49 | class ReplacementType(Enum):
 50 |     """ Evolutionary replacement types enum """
 51 |     MU_PLUS_LAMBDA = 0
 52 |     MU_LAMBDA_WITH_ELITISM = 1
 53 |     MU_LAMBDA_WITHOUT_ELITISM = 2
 54 | 
 55 |     def __repr__(self):
 56 |         """ Human readable """
 57 |         return self.name
 58 | 
 59 | 
 60 | # Fitness types
 61 | @unique
 62 | class FitnessType(Enum):
 63 |     """ Fitness function type """
 64 |     BASIC = 0
 65 |     FULL_MATCH = 1
 66 | 
 67 |     def __repr__(self):
 68 |         """ Human readable """
 69 |         return self.name
 70 | 
 71 | 
 72 | #
 73 | # Dynamic grammar generation related literals
 74 | #
 75 | # Symbol delimiters
 76 | SLD = '<'
 77 | SRD = '>'
 78 | # Grammar symbols
 79 | S = SLD + 'S' + SRD
 80 | P = SLD + 'P' + SRD
 81 | T = SLD + 'T' + SRD
 82 | F = SLD + 'F' + SRD
 83 | # Feature symbols (base)
 84 | ORTH = SLD + 'ORTH' + SRD
 85 | TEXT = SLD + 'TEXT' + SRD
 86 | LOWER = SLD + 'LOWER' + SRD
 87 | LENGTH = SLD + 'LENGTH' + SRD
 88 | POS = SLD + 'POS' + SRD
 89 | TAG = SLD + 'TAG' + SRD
 90 | DEP = SLD + 'DEP' + SRD
 91 | LEMMA = SLD + 'LEMMA' + SRD
 92 | SHAPE = SLD + 'SHAPE' + SRD
 93 | ENT_TYPE = SLD + 'ENT_TYPE' + SRD
 94 | # Feature symbols (base boolean)
 95 | IS_ALPHA = SLD + 'IS_ALPHA' + SRD
 96 | IS_ASCII = SLD + 'IS_ASCII' + SRD
 97 | IS_DIGIT = SLD + 'IS_DIGIT' + SRD
 98 | IS_LOWER = SLD + 'IS_LOWER' + SRD
 99 | IS_UPPER = SLD + 'IS_UPPER' + SRD
100 | IS_TITLE = SLD + 'IS_TITLE' + SRD
101 | IS_PUNCT = SLD + 'IS_PUNCT' + SRD
102 | IS_SPACE = SLD + 'IS_SPACE' + SRD
103 | IS_STOP = SLD + 'IS_STOP' + SRD
104 | LIKE_NUM = SLD + 'LIKE_NUM' + SRD
105 | LIKE_URL = SLD + 'LIKE_URL' + SRD
106 | LIKE_EMAIL = SLD + 'LIKE_EMAIL' + SRD
107 | # Grammar operator and quantifier symbols
108 | OP = SLD + 'OP' + SRD
109 | NEGATION = '!'
110 | ZERO_OR_ONE = '?'
111 | ONE_OR_MORE = '+'
112 | ZERO_OR_MORE = '*'
113 | # Token wildcard
114 | TOKEN_WILDCARD = '{}'
115 | # Grammar extended pattern syntax
116 | XPS = SLD + 'XPS' + SRD
117 | IN = SLD + 'IN' + SRD
118 | NOT_IN = SLD + 'NOT_IN' + SRD
119 | EQQ = SLD + 'EQQ' + SRD
120 | GEQ = SLD + 'GEQ' + SRD
121 | LEQ = SLD + 'LEQ' + SRD
122 | GTH = SLD + 'GTH' + SRD
123 | LTH = SLD + 'LTH' + SRD
124 | XPS_AS = {EQQ: "==", GEQ: ">=", LEQ: "<=", GTH: ">", LTH: "<"}
125 | # Grammar custom attributes extension symbol
126 | UNDERSCORE = SLD + 'UNDERSCORE' + SRD
127 | EF = SLD + 'EF' + SRD
128 | ENT_ID = SLD + 'CUSTOM_ENT_ID_' + SRD
129 | ENT_IOB = SLD + 'CUSTOM_ENT_IOB_' + SRD
130 | ENT_KB_ID = SLD + 'CUSTOM_ENT_KB_ID_' + SRD
131 | HAS_VECTOR = SLD + 'CUSTOM_HAS_VECTOR' + SRD
132 | IS_BRACKET = SLD + 'CUSTOM_IS_BRACKET' + SRD
133 | IS_CURRENCY = SLD + 'CUSTOM_IS_CURRENCY' + SRD
134 | IS_LEFT_PUNCT = SLD + 'CUSTOM_IS_LEFT_PUNCT' + SRD
135 | IS_OOV = SLD + 'CUSTOM_IS_OOV' + SRD
136 | IS_QUOTE = SLD + 'CUSTOM_IS_QUOTE' + SRD
137 | IS_RIGHT_PUNCT = SLD + 'CUSTOM_IS_RIGHT_PUNCT' + SRD
138 | IS_SENT_START = SLD + 'CUSTOM_IS_SENT_START' + SRD
139 | LANG = SLD + 'CUSTOM_LANG_' + SRD
140 | NORM = SLD + 'CUSTOM_NORM_' + SRD
141 | PREFIX = SLD + 'CUSTOM_PREFIX_' + SRD
142 | PROB = SLD + 'CUSTOM_PROB' + SRD
143 | SENT_START = SLD + 'CUSTOM_SENT_START' + SRD
144 | SENTIMENT = SLD + 'CUSTOM_SENTIMENT' + SRD
145 | STRING = SLD + 'CUSTOM_STRING' + SRD
146 | SUFFIX = SLD + 'CUSTOM_SUFFIX_' + SRD
147 | TEXT_WITH_WS = SLD + 'CUSTOM_TEXT_WITH_WS' + SRD
148 | WHITESPACE = SLD + 'CUSTOM_WHITESPACE_' + SRD
149 | # Matcher's util
150 | MATCHER_SUPPORTED_ATTRIBUTES = (
151 |     'orth_',
152 |     'text',
153 |     'lower_',
154 |     'pos_',
155 |     'tag_',
156 |     'dep_',
157 |     'lemma_',
158 |     'shape_',
159 |     'ent_type_',
160 |     'is_alpha',
161 |     'is_ascii',
162 |     'is_digit',
163 |     'is_lower',
164 |     'is_upper',
165 |     'is_title',
166 |     'is_punct',
167 |     'is_space',
168 |     'is_stop',
169 |     'like_num',
170 |     'like_url',
171 |     'like_email')
172 | 
173 | #
174 | # Config ini literals
175 | #
176 | GE = 'GE'
177 | MAX_RUNS = 'MAX_RUNS'
178 | SUCCESS_THRESHOLD = 'SUCCESS_THRESHOLD'
179 | POPULATION_SIZE = 'POPULATION_SIZE'
180 | MAX_GENERATIONS = 'MAX_GENERATIONS'
181 | CODON_LENGTH = 'CODON_LENGTH'
182 | CODONS_X_INDIVIDUAL = 'CODONS_X_INDIVIDUAL'
183 | MUTATION_PROBABILITY = 'MUTATION_PROBABILITY'
184 | OFFSPRING_FACTOR = 'OFFSPRING_FACTOR'
185 | MATING_PROBABILITY = 'MATING_PROBABILITY'
186 | K_VALUE = 'K_VALUE'
187 | SELECTION_TYPE = 'SELECTION_TYPE'
188 | RECOMBINATION_TYPE = 'RECOMBINATION_TYPE'
189 | REPLACEMENT_TYPE = 'REPLACEMENT_TYPE'
190 | FITNESS_FUNCTION_TYPE = 'FITNESS_FUNCTION_TYPE'
191 | DGG = 'DGG'
192 | FEATURES_X_TOKEN = 'FEATURES_X_TOKEN'
193 | USE_BOOLEAN_FEATURES = 'USE_BOOLEAN_FEATURES'
194 | USE_UNIQUES = 'USE_UNIQUES'
195 | USE_GRAMMAR_OPERATORS = 'USE_GRAMMAR_OPERATORS'
196 | USE_TOKEN_WILDCARD = 'USE_TOKEN_WILDCARD'
197 | USE_EXTENDED_PATTERN_SYNTAX = 'USE_EXTENDED_PATTERN_SYNTAX'
198 | USE_CUSTOM_ATTRIBUTES = 'USE_CUSTOM_ATTRIBUTES'
199 | IO = 'IO'
200 | REPORT_PATH = 'REPORT_PATH'
201 | REPORT_FORMAT = 'REPORT_FORMAT'
202 | 
203 | 
204 | @unique
205 | class ReportFormat(Enum):
206 |     """ Report format type """
207 |     JSON = 0
208 |     CSV = 1
209 | 
210 |     def __repr__(self):
211 |         """ Human readable """
212 |         return self.name
213 | 


--------------------------------------------------------------------------------
/PatternOmatic/settings/log.py:
--------------------------------------------------------------------------------
 1 | """ Logging module
 2 | 
 3 | This file is part of PatternOmatic.
 4 | 
 5 | Copyright © 2020  Miguel Revuelta Espinosa
 6 | 
 7 | PatternOmatic is free software: you can redistribute it and/or
 8 | modify it under the terms of the GNU Lesser General Public License
 9 | as published by the Free Software Foundation, either version 3 of
10 | the License, or (at your option) any later version.
11 | 
12 | PatternOmatic is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 | 
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | """
21 | import logging
22 | import sys
23 | import tempfile
24 | from logging.handlers import TimedRotatingFileHandler
25 | 
26 | FORMATTER = \
27 |     logging.Formatter('[%(levelname)s] %(asctime)s %(filename)s:%(funcName)s:%(lineno)d : %(message)s')
28 | 
29 | LOG_FILE = tempfile.gettempdir() + '/patternomatic.log'
30 | 
31 | 
32 | def _get_console_handler():
33 |     """
34 |     Console handler logger
35 |     Returns:
36 | 
37 |     """
38 |     console_handler = logging.StreamHandler(sys.stdout)
39 |     console_handler.setFormatter(FORMATTER)
40 |     return console_handler
41 | 
42 | 
43 | def _get_file_handler():
44 |     """
45 |     File handler logger
46 |     Returns:
47 | 
48 |     """
49 |     file_handler = TimedRotatingFileHandler(LOG_FILE, when='midnight')
50 |     file_handler.setFormatter(FORMATTER)
51 |     return file_handler
52 | 
53 | 
54 | def get_logger(logger_name):
55 |     """
56 |     Returns a set up logger
57 |     Args:
58 |         logger_name: Name of the logger
59 | 
60 |     Returns: logger
61 | 
62 |     """
63 |     logger = logging.getLogger(logger_name)
64 |     logger.setLevel(logging.INFO)
65 |     logger.addHandler(_get_console_handler())
66 |     logger.addHandler(_get_file_handler())
67 |     logger.propagate = False
68 |     return logger
69 | 
70 | 
71 | LOG = get_logger('PatternOmatic')
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://svgshare.com/i/R3P.svg" width="200" height="200" align="right"/> 
  2 | 
  3 | # PatternOmatic 0.2.*
  4 | 
  5 | **\#AI · \#EvolutionaryComputation · \#NLP**
  6 | 
  7 | [![Built with spaCy](https://img.shields.io/badge/made%20with%20❤%20and-spaCy-09a3d5.svg)](https://spacy.io)
  8 | [![License: LGPL v3](https://img.shields.io/badge/License-LGPL%20v3-blue.svg)](https://www.gnu.org/licenses/lgpl-3.0)
  9 | [![Build Status](https://travis-ci.org/revuel/PatternOmatic.svg?branch=master)](https://travis-ci.org/revuel/PatternOmatic) 
 10 | [![Sonar Coverage](https://img.shields.io/sonar/coverage/revuel_PatternOmatic?server=https%3A%2F%2Fsonarcloud.io)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic)
 11 | [![Duplicated Lines (%)](https://sonarcloud.io/api/project_badges/measure?project=revuel_PatternOmatic&metric=duplicated_lines_density)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic)
 12 | [![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=revuel_PatternOmatic&metric=sqale_rating)](https://sonarcloud.io/dashboard?id=revuel_PatternOmatic)
 13 | [![GitHub repo size](https://img.shields.io/github/repo-size/revuel/PatternOmatic?color=teal)](#)
 14 | [![Libraries.io SourceRank](https://img.shields.io/librariesio/sourcerank/pypi/PatternOmatic)](https://libraries.io/pypi/PatternOmatic/sourcerank)
 15 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/PatternOmatic?color=FFDF00&label=downloads)](https://pypistats.org/packages/PatternOmatic)
 16 | [![PyPI version](https://badge.fury.io/py/PatternOmatic.svg?color=red)](https://badge.fury.io/py/PatternOmatic)
 17 | 
 18 | _Discover spaCy's linguistic patterns matching a given set of string samples_
 19 | 
 20 | ## Requirements
 21 | - [Python 3.7.3](https://www.python.org/downloads/release/python-373/)
 22 | - [Spacy 2.3.*](https://spacy.io/usage/v2-3)
 23 | 
 24 | ## Basic usage
 25 | 
 26 | ### From sources
 27 | *[Clone SCM official repository](https://github.com/revuel/PatternOmatic)*
 28 | 
 29 | `git clone git@github.com:revuel/PatternOmatic.git`
 30 | 
 31 | *Play with Makefile*
 32 | 
 33 | - `make venv` to activate project's [Virtual Environment*](https://docs.python.org/3.7/library/venv.html)
 34 | - `make libs` to install dependencies
 35 | - `make test` to run Unit Tests
 36 | - `make coverage` to run Code Coverage
 37 | - `make run` to run PatternOmatic's script with example parameters
 38 | 
 39 | <sub>* you must have one first</sub>
 40 | 
 41 | ### From package
 42 | *Install package*
 43 | 
 44 | `pip install PatternOmatic`
 45 | 
 46 | *Play with the CLI*
 47 | 
 48 | ```
 49 | # Show help 
 50 | patternomatic.py -h
 51 | 
 52 | # Usage example 1: Basic
 53 | patternomatic.py -s Hello world -s Goodbye world
 54 | 
 55 | # Usage example 2: Using a different language
 56 | python -m spacy download es_core_news_sm
 57 | patternomatic.py -s Me llamo Miguel -s Se llama PatternOmatic -l es_core_news_sm
 58 | ```
 59 | 
 60 | *Play with the library*
 61 | ```
 62 | """ 
 63 | PatternOmatic library client example.
 64 | Find linguistic patterns to be used by the spaCy Rule Based Matcher
 65 | 
 66 | """
 67 | from PatternOmatic.api import find_patterns, Config
 68 | 
 69 | if __name__ == '__main__':
 70 | 
 71 |     my_samples = ['I am a cat!', 'You are a dog!', 'She is an owl!']
 72 | 
 73 |     # Optionally, let it evolve a little bit more!
 74 |     config = Config()
 75 |     config.max_generations = 150
 76 |     config.max_runs = 3
 77 | 
 78 |     patterns_found, _ = find_patterns(my_samples)
 79 | 
 80 |     print(f'Patterns found: {patterns_found}')
 81 | 
 82 | ```
 83 | ---
 84 | 
 85 | ## Features
 86 | 
 87 | ### Generic
 88 | 
 89 | &#9989; No OS dependencies, no storage or database required!
 90 | 
 91 | &#9989; Lightweight package with just a little direct pip dependencies
 92 | - [spaCy](https://pypi.org/project/spacy/2.3.2/)
 93 | - [spaCy's en_core_web_sm Language Model](https://github.com/explosion/spacy-models/releases/tag/en_core_web_sm-2.3.0)
 94 | 
 95 | &#9989; Easy and highly configurable to boost clever searches
 96 | 
 97 | &#9989; Includes basic logging mechanism
 98 | 
 99 | &#9989; Includes basic reporting, JSON and CSV format supported. Report file path is configurable
100 | 
101 | &#9989; Configuration file example provided (config.ini)
102 | 
103 | &#9989; Default configuration is run if no configuration file provided
104 | 
105 | &#9989; Provides rollback actions against several possible misconfiguration scenarios
106 | 
107 | ### Evolutionary
108 | 
109 | &#9989; Basic Evolutionary (Grammatical Evolution) parameters available and configurable
110 | 
111 | &#9989; Supports two different Evolutionary Fitness functions
112 | 
113 | &#9989; Supports Binary Tournament Evolutionary Selection Type
114 | 
115 | &#9989; Supports Random One Point Crossover Evolutionary Recombination Type
116 | 
117 | &#9989; Supports "µ + λ" Evolutionary Replacement Type
118 | 
119 | &#9989; Supports "µ ∪ λ" with elitism Evolutionary Replacement Type
120 | 
121 | &#9989; Supports "µ ∪ λ" without elitism Evolutionary Replacement Type
122 | 
123 | &#9989; Typical evolutionary performance metrics included:
124 | - Success Rate (SR)
125 | - Mean Best Fitness (MBF)
126 | - Average Evaluations to Solution (AES)
127 | 
128 | ### Linguistic
129 | 
130 | &#9989; [Compatible with any spaCy Language Model](https://spacy.io/usage/models#languages)
131 | 
132 | &#9989; [Supports all spaCy's Rule Based Matcher standard Token attributes](https://spacy.io/usage/rule-based-matching#adding-patterns-attributes)
133 | 
134 | &#9989; [Supports the following spaCy's Rule Based Matcher non standard Token attributes](https://spacy.io/api/token#attributes) [(via underscore)](https://spacy.io/usage/processing-pipelines#custom-components-attributes)
135 | - ent_id
136 | - ent_iob
137 | - ent_kb_id
138 | - has_vector
139 | - is_bracket
140 | - is_currency
141 | - is_left_punct
142 | - is_oov
143 | - is_quote
144 | - is_right_punct
145 | - lang
146 | - norm
147 | - prefix
148 | - sentiment
149 | - string
150 | - suffix
151 | - text_with_ws
152 | - whitespace
153 | 
154 | &#9989; Supports skipping boolean Token attributes
155 | 
156 | &#9989; [Supports spaCy's Rule Based Matcher Extended Pattern Syntax](https://spacy.io/usage/rule-based-matching#adding-patterns-attributes-extended)
157 | 
158 | &#9989; [Supports spaCy's Rule Based Matcher Grammar Operators and Quantifiers](https://spacy.io/usage/rule-based-matching#quantifiers)
159 | 
160 | &#9989; [Supports Token Wildcard](https://spacy.io/usage/rule-based-matching#adding-patterns-wildcard)
161 | 
162 | &#9989; Supports defining the number of attributes per token within searched patterns
163 | 
164 | &#9989; Supports usage of non repeated token attribute values
165 | 
166 | ---
167 | 
168 | Author: [Miguel Revuelta Espinosa _(revuel)_](mailto:revuel22@hotmail.com "Contact author"), a humble AI enthusiastic
169 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
  1 | # - Configuration File example for "PatternOmatic"
  2 | #
  3 | # Grammatical Evolution (GE) parameters
  4 | #
  5 | [GE]
  6 | # Number of runs per execution. This is the amount of new populations to spawn per execution. Integer greater than 0
  7 | # Integer within interval [3, *)
  8 | MAX_RUNS = 4
  9 | 
 10 | # Minimum fitness value found in an execution to consider this a successful execution.
 11 | # Float within interval [0.0, 1.0]
 12 | SUCCESS_THRESHOLD = 0.8
 13 | 
 14 | # Number of individuals per population
 15 | # Integer within interval [4, *)
 16 | POPULATION_SIZE = 100
 17 | 
 18 | # Maximum number of generations per population in a run.
 19 | # Integer within interval [1, *)
 20 | MAX_GENERATIONS = 20
 21 | 
 22 | # Number of gen per codon
 23 | # Integer within interval [1, 16]
 24 | CODON_LENGTH = 8
 25 | 
 26 | # Number of codons per individual dna
 27 | # Integer within the set (4, 8, 16)
 28 | CODONS_X_INDIVIDUAL = 4
 29 | 
 30 | # Mutation probability. Chance of mutating a gen within and individual birth, for every gen
 31 | # Float within interval [0.0, 1.0]
 32 | MUTATION_PROBABILITY = 0.5
 33 | 
 34 | # Growth factor while generating offspring.
 35 | # This factor appears in the literature. Do not edit this value.
 36 | OFFSPRING_FACTOR = 3.5
 37 | 
 38 | # Chances to produce offspring per individuals selection
 39 | # Float within interval [0.0, 1.0]
 40 | MATING_PROBABILITY = 0.9
 41 | 
 42 | # Number of indivudals to compete where K_TOURNAMENT is the selection mode
 43 | # Integer within interval [3, *)
 44 | K_VALUE = 3
 45 | 
 46 | # Selection type:
 47 | # 0 = BINARY_TOURNAMENT
 48 | # 1 = K_TOURNAMENT
 49 | SELECTION_TYPE = 0
 50 | 
 51 | # Recombination type:
 52 | # 0 = RANDOM_ONE_POINT_CROSSOVER
 53 | RECOMBINATION_TYPE = 0
 54 | 
 55 | # Replacement type:
 56 | # 0 = MU_PLUS_LAMBDA
 57 | # 1 = MU_LAMBDA_WITH_ELITISM
 58 | # 2 = MU_LAMBDA_WITHOUT_ELITISM
 59 | REPLACEMENT_TYPE = 0
 60 | 
 61 | # Fitness function type:
 62 | # 0 = BASIC
 63 | # 1 = FULL_MATCH
 64 | FITNESS_FUNCTION_TYPE = 1
 65 | 
 66 | #
 67 | # Dynamic Grammar Generation (DGG) parameters
 68 | #
 69 | [DGG]
 70 | # Features per token:
 71 | # 0 or < 0 = unlimited
 72 | # 1 or more until the maximum number of features = that number of features per token
 73 | # > maximum number of features per token = the maximum number of features per token
 74 | # 1 is the recommended value here
 75 | FEATURES_X_TOKEN = 1
 76 | 
 77 | # Use uniques:
 78 | # True = Do not repeat features per production rule
 79 | # False = Features can be repeated per production rule
 80 | USE_UNIQUES = True
 81 | 
 82 | # Use boolean features:
 83 | # True = Enable the usage of Spacy's boolean token features (not recommended)
 84 | # False = Disable the usage of Spacy's boolean token features (recommended)
 85 | # This features show a highly positive correlation, which means they are not usefull for finding patterns
 86 | USE_BOOLEAN_FEATURES = False
 87 | 
 88 | # Use Grammar Operators:
 89 | # True = Enable patterns with Spacy's Grammar Operators
 90 | # False = Disable patterns with Spacy's Grammar Operators
 91 | # Grammar Operators and Extended Pattern Syntax can not be enabled together
 92 | USE_GRAMMAR_OPERATORS = False
 93 | 
 94 | # Use Token Wildcard:
 95 | # True = Enable patterns with Token Wildcard
 96 | # False = Disable patterns with Token Wildcard
 97 | USE_TOKEN_WILDCARD = False
 98 | 
 99 | # Use Extended Pattern Syntax:
100 | # True = Enable patterns with Spacy's Extended Pattern Syntax
101 | # False = Disable patterns with Spacy's Extended Pattern Syntax
102 | # Grammar Operators and Extended Pattern Syntax can not be enabled together
103 | USE_EXTENDED_PATTERN_SYNTAX = False
104 | 
105 | # Use Custom Features:
106 | # True = Enable patterns with underscore, where all the token's attributes not accepted by the Matcher are included
107 | # False = Disable patterns with underscore, where all the token's attributes not accepted by the Matcher are included
108 | USE_CUSTOM_ATTRIBUTES = False
109 | 
110 | #
111 | # Operating System (OS) configuration options
112 | #
113 | [IO]
114 | # Valid OS path and filename to persist execution report
115 | REPORT_PATH = /tmp/patternOmatic_report.txt
116 | 
117 | # Report format
118 | # 0 = json format
119 | # 1 = csv format
120 | REPORT_FORMAT = 0
121 | 


--------------------------------------------------------------------------------
/patternomatic_logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
  3 |  "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
  4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg"
  5 |  width="1598.000000pt" height="1598.000000pt" viewBox="0 0 1598.000000 1598.000000"
  6 |  preserveAspectRatio="xMidYMid meet">
  7 | <g transform="translate(0.000000,1598.000000) scale(0.100000,-0.100000)"
  8 | fill="#000000" stroke="none">
  9 | <path d="M7795 13034 c-16 -2 -77 -9 -135 -15 -1051 -111 -1995 -852 -2356
 10 | -1849 -134 -367 -192 -707 -181 -1050 5 -137 24 -368 32 -378 1 -1 65 -7 141
 11 | -12 178 -13 371 -36 501 -59 l101 -18 67 156 c94 218 253 531 321 631 193 286
 12 | 494 583 779 769 235 153 522 279 784 345 231 58 393 78 644 80 l218 2 48 95
 13 | c137 269 326 523 588 789 l114 116 -118 61 c-379 197 -751 304 -1158 333 -132
 14 | 9 -324 11 -390 4z"/>
 15 | <path d="M9739 12333 c-86 -64 -239 -196 -286 -245 l-32 -33 353 -400 354
 16 | -399 128 -68 c71 -37 130 -66 131 -64 1 1 -88 250 -198 552 -217 595 -201 561
 17 | -320 664 l-55 49 -75 -56z"/>
 18 | <path d="M9227 11828 c-41 -51 -98 -127 -126 -168 l-52 -74 133 -27 c206 -41
 19 | 468 -114 668 -186 8 -3 -109 119 -260 271 -151 152 -278 276 -281 276 -4 0
 20 | -40 -42 -82 -92z"/>
 21 | <path d="M10376 11690 c31 -80 108 -282 171 -450 115 -304 116 -305 167 -352
 22 | 28 -26 54 -47 58 -48 9 0 -33 154 -77 278 -68 195 -165 397 -279 577 -94 151
 23 | -100 150 -40 -5z"/>
 24 | <path d="M8292 11282 c-10 -7 -3 -104 34 -478 25 -258 48 -476 52 -484 3 -8 9
 25 | 17 13 55 28 238 86 541 146 764 30 110 33 135 22 142 -18 11 -249 12 -267 1z"/>
 26 | <path d="M8002 11269 c-173 -16 -378 -83 -523 -169 l-68 -40 28 -63 c87 -192
 27 | 894 -1933 898 -1937 3 -3 3 103 0 235 -5 219 -15 317 -106 1109 -56 478 -101
 28 | 871 -101 873 0 4 -21 3 -128 -8z"/>
 29 | <path d="M8837 11134 c-58 -190 -87 -324 -70 -324 30 0 55 45 128 224 43 107
 30 | 77 198 74 201 -3 3 -26 8 -50 11 l-45 6 -37 -118z"/>
 31 | <path d="M9037 11043 c-37 -91 -65 -168 -63 -170 3 -3 606 148 641 161 23 8
 32 | -253 110 -399 147 -45 11 -88 22 -96 24 -11 4 -32 -37 -83 -162z"/>
 33 | <path d="M7166 10869 c-112 -92 -175 -154 -293 -286 l-83 -93 383 -512 c210
 34 | -282 494 -663 631 -845 l248 -333 51 -15 c64 -19 128 -19 172 -1 l35 15 -34
 35 | 73 c-74 160 -998 2063 -1004 2070 -5 4 -52 -29 -106 -73z"/>
 36 | <path d="M9363 10792 l-481 -146 -98 -245 -97 -246 -13 -140 c-24 -246 -33
 37 | -601 -22 -865 21 -480 -21 -653 -178 -747 -103 -62 -153 -76 -284 -81 -203 -7
 38 | -361 37 -690 193 -357 169 -630 279 -980 393 -254 83 -379 117 -389 106 -21
 39 | -23 -118 -503 -148 -732 l-17 -136 99 -94 c333 -314 763 -548 1215 -662 829
 40 | -208 1683 -47 2380 450 139 98 334 273 450 402 310 343 514 714 645 1169 63
 41 | 216 105 515 105 738 l0 110 -38 10 c-52 14 -104 55 -153 121 -46 62 -176 162
 42 | -409 315 -172 113 -382 236 -402 234 -7 -1 -230 -67 -495 -147z"/>
 43 | <path d="M6610 10214 c-79 -122 -241 -456 -317 -651 -3 -8 32 -23 99 -44 214
 44 | -66 422 -152 691 -283 258 -126 408 -195 596 -275 56 -24 71 -27 59 -14 -9 10
 45 | -248 311 -533 668 -284 358 -523 656 -530 664 -11 12 -22 1 -65 -65z"/>
 46 | <path d="M5320 9119 c60 -153 131 -294 232 -462 l35 -58 7 49 c7 57 68 308 97
 47 | 401 10 35 18 65 17 67 -4 3 -266 41 -346 50 l-63 7 21 -54z"/>
 48 | <path d="M9185 5506 c-167 -43 -276 -144 -325 -303 -26 -83 -90 -454 -90 -519
 49 | 0 -84 26 -153 79 -205 58 -59 120 -79 256 -86 262 -13 430 87 505 301 29 81
 50 | 90 438 90 524 0 41 -5 93 -12 116 -17 57 -99 137 -163 160 -65 23 -269 30
 51 | -340 12z m157 -226 c9 0 30 -14 47 -31 27 -27 31 -38 31 -85 0 -82 -58 -392
 52 | -83 -444 -32 -66 -74 -90 -158 -90 -87 0 -118 23 -126 90 -6 51 48 380 73 447
 53 | 17 48 71 100 112 109 15 3 32 7 37 9 6 2 19 2 30 0 11 -3 28 -5 37 -5z"/>
 54 | <path d="M14048 5506 c-132 -30 -240 -111 -290 -217 -35 -75 -42 -104 -84
 55 | -336 -51 -282 -48 -379 13 -461 53 -71 187 -109 352 -99 228 13 366 107 421
 56 | 286 11 35 20 74 20 87 l0 24 -134 0 -134 0 -7 -28 c-17 -81 -80 -132 -163
 57 | -132 -91 0 -121 27 -122 108 0 66 56 394 76 441 30 71 80 103 161 105 44 0 57
 58 | -4 79 -25 46 -46 51 -144 12 -221 l-18 -38 133 0 134 0 28 61 c33 75 45 194
 59 | 24 266 -17 64 -95 143 -164 167 -65 24 -257 30 -337 12z"/>
 60 | <path d="M1835 5498 c-2 -7 -45 -247 -95 -533 -49 -286 -92 -528 -94 -537 -4
 61 | -17 8 -18 133 -18 l137 0 30 168 30 167 154 6 c162 6 201 15 280 65 53 33 107
 62 | 94 129 144 24 56 61 247 61 318 0 107 -55 180 -158 212 -57 18 -94 20 -333 20
 63 | -207 0 -271 -3 -274 -12z m452 -254 c23 -24 25 -31 21 -88 -7 -79 -32 -132
 64 | -72 -153 -42 -22 -216 -32 -216 -12 0 15 40 248 45 267 3 8 32 12 100 12 90 0
 65 | 98 -2 122 -26z"/>
 66 | <path d="M2937 5497 c-2 -7 -89 -249 -192 -537 -103 -288 -190 -530 -192 -537
 67 | -4 -10 26 -13 141 -13 l146 0 37 120 37 120 129 0 130 0 -6 -120 -6 -120 144
 68 | 0 145 0 0 550 0 550 -254 0 c-197 0 -255 -3 -259 -13z m255 -359 c-2 -57 -7
 69 | -143 -10 -193 l-7 -90 -97 -3 c-54 -1 -98 1 -98 6 0 10 110 364 117 375 2 4
 70 | 25 7 51 7 l47 0 -3 -102z"/>
 71 | <path d="M3700 5507 c0 -1 -9 -49 -20 -107 -11 -58 -20 -111 -20 -117 0 -10
 72 | 44 -13 185 -13 170 0 185 -1 185 -17 -1 -10 -34 -204 -74 -430 l-72 -413 137
 73 | 0 137 0 6 28 c3 15 33 185 66 377 33 193 65 374 71 402 l10 53 187 2 187 3 16
 74 | 80 c9 44 19 97 23 118 l7 37 -516 0 c-283 0 -515 -1 -515 -3z"/>
 75 | <path d="M4743 5398 c-11 -62 -18 -116 -16 -120 2 -5 88 -8 190 -8 l185 0 -5
 76 | -22 c-6 -23 -147 -825 -147 -834 0 -3 62 -3 138 -2 l137 3 75 428 75 427 187
 77 | 0 c186 0 188 0 193 23 10 48 35 190 35 203 0 12 -82 14 -513 14 l-514 0 -20
 78 | -112z"/>
 79 | <path d="M5920 5506 c0 -3 -43 -247 -95 -543 -52 -296 -95 -541 -95 -545 0 -5
 80 | 172 -7 382 -6 l383 3 20 115 21 115 -243 3 -243 2 0 23 c1 12 6 51 13 87 l12
 81 | 65 202 3 c200 2 203 3 208 25 11 49 35 183 35 200 0 16 -16 17 -201 17 l-201
 82 | 0 6 33 c3 17 10 62 17 99 l11 68 246 2 247 3 16 89 c10 49 20 102 23 117 l6
 83 | 29 -385 0 c-212 0 -385 -2 -385 -4z"/>
 84 | <path d="M6865 5498 c-2 -7 -45 -247 -95 -533 -49 -286 -92 -528 -94 -537 -4
 85 | -17 8 -18 143 -18 l147 0 30 203 30 202 49 3 50 3 33 -108 c18 -59 47 -152 64
 86 | -205 l31 -98 154 0 154 0 -7 23 c-4 12 -36 112 -71 222 l-63 200 23 15 c78 51
 87 | 120 129 149 274 42 215 0 311 -153 352 -73 19 -567 21 -574 2z m418 -252 c50
 88 | -46 45 -156 -9 -207 -23 -21 -37 -24 -120 -27 -52 -2 -94 -1 -94 2 0 12 30
 89 | 211 35 234 5 20 11 22 84 22 68 0 81 -3 104 -24z"/>
 90 | <path d="M7870 5506 c0 -3 -43 -247 -95 -543 -52 -296 -95 -541 -95 -546 0 -4
 91 | 62 -6 137 -5 l137 3 50 292 c28 161 53 289 57 285 4 -4 21 -52 39 -107 17 -55
 92 | 58 -184 91 -287 l60 -188 140 0 c82 0 139 4 139 10 0 10 177 1031 185 1068 5
 93 | 22 5 22 -133 22 l-137 0 -44 -252 c-25 -139 -47 -264 -51 -278 -5 -18 -25 31
 94 | -87 211 -45 129 -87 254 -94 277 l-12 42 -144 0 c-78 0 -143 -2 -143 -4z"/>
 95 | <path d="M9861 4983 c-51 -291 -95 -538 -98 -551 l-5 -23 138 3 139 3 36 206
 96 | c64 364 71 400 76 395 2 -2 20 -60 39 -127 19 -68 41 -148 49 -176 l16 -53 98
 97 | 0 99 0 114 179 c62 99 114 176 116 171 2 -4 -20 -135 -47 -291 -28 -156 -51
 98 | -289 -51 -296 0 -10 31 -13 135 -13 131 0 135 1 140 23 10 47 185 1048 185
 99 | 1062 0 13 -24 15 -142 15 l-143 -1 -173 -260 c-95 -144 -175 -259 -177 -257
100 | -3 2 -40 119 -84 259 l-79 254 -144 3 -144 3 -93 -528z"/>
101 | <path d="M11230 5053 c-90 -252 -178 -500 -196 -551 l-34 -93 147 3 147 3 36
102 | 115 36 115 127 3 128 3 -3 -121 -3 -120 143 0 142 0 0 550 0 550 -253 0 -254
103 | 0 -163 -457z m414 75 c-2 -62 -7 -150 -10 -195 l-6 -83 -93 0 c-52 0 -96 3
104 | -98 8 -4 5 67 248 108 370 3 7 23 12 54 12 l51 0 -6 -112z"/>
105 | <path d="M12135 5413 c-9 -54 -19 -108 -22 -120 l-5 -23 189 -2 189 -3 -73
106 | -410 c-39 -225 -72 -418 -72 -427 -1 -16 12 -18 134 -18 131 0 135 1 140 23 7
107 | 28 145 820 145 830 0 4 86 7 190 7 105 0 190 1 190 3 0 2 9 53 20 113 11 60
108 | 20 112 20 117 0 4 -231 7 -514 7 l-514 0 -17 -97z"/>
109 | <path d="M13305 5488 c-10 -48 -185 -1049 -185 -1063 0 -12 23 -15 134 -15
110 | l134 0 6 28 c9 39 186 1055 186 1064 0 5 -61 8 -135 8 -131 0 -135 -1 -140
111 | -22z"/>
112 | <path d="M3520 3635 c0 -96 3 -115 15 -115 11 0 15 12 15 50 l0 50 50 0 c64 0
113 | 66 18 3 22 -48 3 -48 3 -51 41 l-3 37 55 0 c43 0 56 3 56 15 0 12 -14 15 -70
114 | 15 l-70 0 0 -115z"/>
115 | <path d="M3777 3743 c-4 -3 -7 -55 -7 -115 0 -89 3 -108 15 -108 12 0 15 19
116 | 15 115 0 105 -5 127 -23 108z"/>
117 | <path d="M3930 3636 c0 -95 3 -116 15 -116 11 0 15 18 17 84 l3 85 64 -85 c35
118 | -46 68 -84 72 -84 5 0 9 52 9 115 0 135 -18 158 -22 28 l-3 -88 -65 84 c-36
119 | 47 -71 86 -77 89 -10 3 -13 -24 -13 -112z"/>
120 | <path d="M4230 3635 l0 -115 55 0 c67 0 109 20 130 60 19 37 19 73 -1 111 -19
121 | 38 -68 59 -134 59 l-50 0 0 -115z m149 59 c28 -35 27 -86 -1 -119 -18 -20 -34
122 | -26 -68 -28 l-45 -2 -3 88 -3 87 50 0 c42 0 53 -4 70 -26z"/>
123 | <path d="M4550 3730 c-39 -39 -18 -90 42 -105 35 -9 51 -29 43 -54 -8 -27 -32
124 | -34 -72 -20 -40 13 -55 5 -27 -16 40 -30 107 -13 124 31 14 36 -3 62 -56 84
125 | -31 14 -44 25 -44 39 0 28 32 44 64 31 31 -11 45 4 17 19 -31 17 -69 13 -91
126 | -9z"/>
127 | <path d="M4900 3636 l0 -116 60 0 c47 0 60 3 60 15 0 11 -12 15 -45 15 l-44 0
128 | -3 97 c-2 66 -7 98 -15 101 -10 3 -13 -24 -13 -112z"/>
129 | <path d="M5130 3635 c0 -96 3 -115 15 -115 12 0 15 19 15 115 0 96 -3 115 -15
130 | 115 -12 0 -15 -19 -15 -115z"/>
131 | <path d="M5290 3635 c0 -135 18 -158 22 -28 l3 88 65 -84 c36 -47 71 -86 78
132 | -89 9 -3 12 24 12 113 0 89 -3 116 -12 113 -9 -3 -15 -34 -18 -87 l-5 -82 -63
133 | 85 c-35 47 -68 86 -73 86 -5 0 -9 -52 -9 -115z"/>
134 | <path d="M5644 3740 c-38 -15 -64 -59 -64 -107 0 -38 5 -50 34 -79 27 -27 42
135 | -34 75 -34 22 0 52 5 66 12 21 9 25 18 25 55 0 43 0 43 -35 43 -19 0 -35 -4
136 | -35 -10 0 -5 9 -10 20 -10 15 0 20 -7 20 -30 0 -25 -4 -30 -34 -35 -36 -7 -72
137 | 6 -93 34 -20 26 -15 85 9 116 20 26 25 27 80 22 61 -5 77 6 32 23 -30 12 -71
138 | 12 -100 0z"/>
139 | <path d="M5890 3660 c0 -83 2 -93 25 -115 32 -33 84 -33 121 -1 26 21 28 30
140 | 32 115 4 79 2 91 -12 91 -13 0 -16 -13 -16 -79 0 -90 -17 -123 -63 -123 -47 0
141 | -56 16 -59 110 -2 57 -7 87 -15 90 -10 3 -13 -19 -13 -88z"/>
142 | <path d="M6190 3635 c0 -96 3 -115 15 -115 12 0 15 19 15 115 0 96 -3 115 -15
143 | 115 -12 0 -15 -19 -15 -115z"/>
144 | <path d="M6362 3734 c-45 -31 -21 -94 41 -109 40 -10 51 -37 28 -66 -18 -22
145 | -21 -23 -56 -10 -42 15 -58 8 -29 -14 51 -39 128 -2 122 58 -3 27 -10 35 -50
146 | 55 -33 15 -48 28 -48 42 0 27 34 44 60 30 28 -15 49 -1 25 17 -25 17 -65 17
147 | -93 -3z"/>
148 | <path d="M6560 3735 c0 -10 11 -15 35 -15 l35 0 0 -100 c0 -82 3 -100 15 -100
149 | 12 0 15 18 15 100 l0 100 35 0 c24 0 35 5 35 15 0 12 -16 15 -85 15 -69 0 -85
150 | -3 -85 -15z"/>
151 | <path d="M6840 3636 c0 -98 2 -116 15 -116 13 0 15 17 13 112 -2 78 -7 113
152 | -15 116 -10 3 -13 -24 -13 -112z"/>
153 | <path d="M7034 3731 c-93 -56 -53 -211 54 -211 39 0 86 25 77 40 -3 5 -18 2
154 | -34 -6 -63 -31 -127 19 -118 91 8 56 38 78 99 73 52 -5 64 7 22 22 -38 15 -65
155 | 12 -100 -9z"/>
156 | <path d="M7407 3743 c-4 -3 -7 -55 -7 -115 0 -87 3 -108 14 -108 10 0 16 14
157 | 18 43 3 41 4 42 45 48 23 3 48 11 57 18 17 14 21 67 8 88 -16 25 -117 45 -135
158 | 26z m112 -45 c14 -34 -9 -62 -53 -66 l-36 -3 0 45 0 46 40 0 c33 0 42 -4 49
159 | -22z"/>
160 | <path d="M7678 3635 c-35 -88 -43 -115 -31 -115 8 0 20 14 26 30 11 28 15 30
161 | 65 30 51 0 54 -2 65 -30 13 -33 43 -43 32 -11 -24 77 -84 206 -96 209 -11 2
162 | -29 -31 -61 -113z m76 42 c32 -77 31 -77 -13 -77 -23 0 -41 3 -41 8 0 10 33
163 | 102 37 102 1 0 9 -15 17 -33z"/>
164 | <path d="M7920 3735 c0 -10 11 -15 35 -15 l35 0 0 -100 c0 -82 3 -100 15 -100
165 | 12 0 15 18 15 100 l0 100 35 0 c24 0 35 5 35 15 0 12 -16 15 -85 15 -69 0 -85
166 | -3 -85 -15z"/>
167 | <path d="M8170 3735 c0 -10 11 -15 35 -15 l35 0 0 -100 c0 -82 3 -100 15 -100
168 | 12 0 15 18 15 100 l0 100 35 0 c24 0 35 5 35 15 0 12 -16 15 -85 15 -69 0 -85
169 | -3 -85 -15z"/>
170 | <path d="M8440 3635 l0 -115 70 0 c81 0 95 18 18 22 l-53 3 -3 38 -3 37 45 0
171 | c34 0 46 4 46 15 0 11 -12 15 -45 15 -45 0 -45 0 -45 35 l0 35 55 0 c42 0 55
172 | 3 55 15 0 12 -14 15 -70 15 l-70 0 0 -115z"/>
173 | <path d="M8697 3743 c-4 -3 -7 -55 -7 -115 0 -89 3 -108 15 -108 11 0 15 12
174 | 15 44 0 72 25 72 78 0 18 -24 40 -44 48 -44 11 0 6 14 -21 49 -35 48 -35 50
175 | -16 61 45 24 38 92 -11 109 -35 12 -91 15 -101 4z m111 -51 c5 -33 -15 -52
176 | -54 -52 -33 0 -34 1 -34 41 l0 40 43 -3 c35 -3 43 -7 45 -26z"/>
177 | <path d="M8960 3636 c0 -95 3 -116 15 -116 11 0 15 18 17 84 l3 85 64 -85 c35
178 | -46 68 -84 72 -84 5 0 9 52 9 115 0 135 -18 158 -22 28 l-3 -88 -65 84 c-36
179 | 47 -71 86 -77 89 -10 3 -13 -24 -13 -112z"/>
180 | <path d="M9282 3734 c-45 -31 -21 -94 41 -109 40 -10 51 -37 28 -66 -18 -22
181 | -21 -23 -56 -10 -42 15 -58 8 -29 -14 51 -39 128 -2 122 58 -3 27 -10 35 -50
182 | 55 -33 15 -48 28 -48 42 0 27 34 44 60 30 28 -15 49 -1 25 17 -25 17 -65 17
183 | -93 -3z"/>
184 | <path d="M9620 3635 l0 -115 70 0 c81 0 95 18 18 22 l-53 3 -3 38 -3 37 45 0
185 | c34 0 46 4 46 15 0 11 -12 15 -45 15 -45 0 -45 0 -45 35 l0 35 55 0 c42 0 55
186 | 3 55 15 0 12 -14 15 -70 15 l-70 0 0 -115z"/>
187 | <path d="M9870 3635 c0 -96 3 -115 15 -115 11 0 15 12 15 50 l0 50 50 0 c64 0
188 | 66 18 3 22 -48 3 -48 3 -51 41 l-3 37 55 0 c43 0 56 3 56 15 0 12 -14 15 -70
189 | 15 l-70 0 0 -115z"/>
190 | <path d="M10120 3635 c0 -96 3 -115 15 -115 11 0 15 12 15 50 l0 50 45 0 c25
191 | 0 45 5 45 10 0 6 -20 10 -45 10 l-45 0 0 39 0 40 53 3 c75 4 64 22 -15 26
192 | l-68 3 0 -116z"/>
193 | <path d="M10414 3741 c-38 -17 -64 -60 -64 -108 0 -38 5 -50 34 -79 28 -28 42
194 | -34 76 -34 34 0 48 6 76 34 45 45 48 105 7 155 -30 35 -88 49 -129 32z m105
195 | -47 c55 -70 -7 -173 -86 -143 -37 14 -52 38 -53 80 0 54 31 89 80 89 30 0 43
196 | -6 59 -26z"/>
197 | <path d="M10680 3636 c0 -96 3 -116 15 -116 11 0 15 12 15 51 0 43 3 50 17 47
198 | 10 -2 34 -25 53 -51 20 -26 43 -47 53 -47 16 0 15 5 -10 38 -45 56 -45 59 -18
199 | 86 30 30 31 46 7 76 -14 18 -32 24 -75 28 l-57 4 0 -116z m114 62 c17 -27 -5
200 | -52 -48 -56 l-36 -3 0 41 0 41 36 -3 c22 -2 42 -10 48 -20z"/>
201 | <path d="M10930 3735 c0 -10 11 -15 35 -15 l35 0 0 -100 c0 -82 3 -100 15
202 | -100 12 0 15 18 15 100 l0 100 35 0 c24 0 35 5 35 15 0 12 -16 15 -85 15 -69
203 | 0 -85 -3 -85 -15z"/>
204 | <path d="M11200 3635 l0 -115 65 0 c51 0 65 3 65 15 0 11 -12 15 -50 15 l-50
205 | 0 0 100 c0 82 -3 100 -15 100 -12 0 -15 -19 -15 -115z"/>
206 | <path d="M11430 3635 l0 -115 70 0 c40 0 70 4 70 10 0 6 -25 10 -55 10 l-55 0
207 | 0 39 c0 40 0 40 43 43 23 2 42 8 42 13 0 6 -19 11 -42 13 -42 3 -43 4 -43 37
208 | 0 34 0 34 52 37 30 2 53 8 56 16 3 9 -15 12 -67 12 l-71 0 0 -115z"/>
209 | <path d="M11692 3734 c-45 -31 -21 -94 41 -109 40 -10 51 -37 28 -66 -18 -22
210 | -21 -23 -56 -10 -42 15 -58 8 -29 -14 51 -39 128 -2 122 58 -3 27 -10 35 -50
211 | 55 -33 15 -48 28 -48 42 0 27 34 44 60 30 28 -15 49 -1 25 17 -25 17 -65 17
212 | -93 -3z"/>
213 | <path d="M11920 3730 c-39 -39 -18 -90 42 -105 35 -9 51 -29 43 -54 -8 -27
214 | -32 -34 -72 -20 -40 13 -55 5 -27 -16 40 -30 107 -13 124 31 14 36 -3 62 -56
215 | 84 -31 14 -44 25 -44 39 0 28 32 44 64 31 31 -11 45 4 17 19 -31 17 -69 13
216 | -91 -9z"/>
217 | <path d="M12140 3635 l0 -115 65 0 c51 0 65 3 65 15 0 11 -12 15 -50 15 l-50
218 | 0 0 100 c0 82 -3 100 -15 100 -12 0 -15 -19 -15 -115z"/>
219 | <path d="M12350 3744 c0 -4 18 -35 40 -70 31 -50 40 -73 40 -109 0 -33 4 -45
220 | 15 -45 11 0 15 12 15 48 0 30 8 60 21 82 12 19 30 50 41 68 16 26 17 32 5 32
221 | -9 0 -31 -25 -50 -56 l-34 -57 -34 57 c-19 31 -39 56 -46 56 -7 0 -13 -3 -13
222 | -6z"/>
223 | </g>
224 | </svg>
225 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools==40.8.0
2 | pip==20.2.3
3 | coverage==5.0.3
4 | wheel==0.33.6
5 | importlib-metadata==2.0.0
6 | twine==3.2.0
7 | spacy==2.3.*
8 | https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.0/en_core_web_sm-2.3.0.tar.gz#egg=en_core_web_sm
9 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/patternomatic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | """ Command Line Interface module
  3 | 
  4 | This file is part of PatternOmatic.
  5 | 
  6 | Copyright © 2020  Miguel Revuelta Espinosa
  7 | 
  8 | PatternOmatic is free software: you can redistribute it and/or
  9 | modify it under the terms of the GNU Lesser General Public License
 10 | as published by the Free Software Foundation, either version 3 of
 11 | the License, or (at your option) any later version.
 12 | 
 13 | PatternOmatic is distributed in the hope that it will be useful,
 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | GNU Lesser General Public License for more details.
 17 | 
 18 | You should have received a copy of the GNU Lesser General Public License
 19 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 | """
 22 | import sys
 23 | from typing import List
 24 | from argparse import ArgumentParser
 25 | from PatternOmatic.api import find_patterns
 26 | from PatternOmatic.settings.log import LOG
 27 | 
 28 | 
 29 | def main(args: List) -> None:
 30 |     """
 31 |     PatternOmatic's script main function wrapper
 32 |     Args:
 33 |         args: Command Line Input Arguments
 34 | 
 35 |     Returns: None
 36 | 
 37 |     """
 38 |     LOG.info('Parsing command line arguments...')
 39 |     try:
 40 |         cli = ArgumentParser(
 41 |             description='Finds the Spacy\'s Matcher pattern for the given samples',
 42 |             epilog='...using actual Artificial Intelligence'
 43 |         )
 44 | 
 45 |         # Samples
 46 |         cli.add_argument(
 47 |             '-s',
 48 |             '--sample',
 49 |             action='append',
 50 |             required=True,
 51 |             nargs='+',
 52 |             type=str,
 53 |             help='A sample phrase'
 54 |         )
 55 | 
 56 |         # Spacy Language Model
 57 |         cli.add_argument(
 58 |             '-l',
 59 |             '--language',
 60 |             nargs='?',
 61 |             type=str,
 62 |             default='en_core_web_sm',
 63 |             help='Spacy language model to be used'
 64 |         )
 65 | 
 66 |         # Configuration file to be used
 67 |         cli.add_argument(
 68 |             '-c',
 69 |             '--config',
 70 |             nargs='?',
 71 |             type=str,
 72 |             help='Configuration file path to be used',
 73 |             default=None,
 74 |         )
 75 | 
 76 |         # Parse command line input arguments/options
 77 |         parsed_args = cli.parse_args(args)
 78 | 
 79 |         # Join sample arguments
 80 |         for index, item in enumerate(parsed_args.sample):
 81 |             parsed_args.sample[index] = ' '.join(item)
 82 | 
 83 |         #
 84 |         # Find patterns
 85 |         #
 86 |         patterns_found, _ = find_patterns(
 87 |             parsed_args.sample,
 88 |             configuration=parsed_args.config,
 89 |             spacy_language_model_name=parsed_args.language)
 90 | 
 91 |         LOG.info(f'Patterns found: {patterns_found}')
 92 | 
 93 |     except Exception as ex:
 94 |         LOG.critical(f'Fatal error: {repr(ex)}')
 95 |         raise ex
 96 | 
 97 | 
 98 | #
 99 | # OS INPUT
100 | #
101 | if __name__ == '__main__': \
102 |     main(sys.argv[1:])
103 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """ Setup tools (build distribution) module
 2 | 
 3 | This file is part of PatternOmatic.
 4 | 
 5 | Copyright © 2020  Miguel Revuelta Espinosa
 6 | 
 7 | PatternOmatic is free software: you can redistribute it and/or
 8 | modify it under the terms of the GNU Lesser General Public License
 9 | as published by the Free Software Foundation, either version 3 of
10 | the License, or (at your option) any later version.
11 | 
12 | PatternOmatic is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 | 
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | """
21 | import setuptools
22 | 
23 | with open("README.md", "r") as fh:
24 |     long_description = fh.read()
25 | 
26 | setuptools.setup(
27 |     name="PatternOmatic",
28 |     version="0.2.3",
29 |     author="Miguel Revuelta Espinosa",
30 |     author_email="revuel22@hotmail.com",
31 |     description="AI/NLP (Spacy) Rule Based Matcher pattern finder",
32 |     long_description=long_description,
33 |     long_description_content_type="text/markdown",
34 |     url="https://github.com/revuel/PatternOmatic",
35 |     packages=setuptools.find_packages(),
36 |     scripts=['scripts/patternomatic.py'],
37 |     install_requires=[
38 |         'spacy==2.3.0'
39 |     ],
40 |     classifiers=[
41 |         "Programming Language :: Python :: 3",
42 |         "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
43 |         "Operating System :: OS Independent",
44 |     ],
45 |     python_requires='>=3.7',
46 | )
47 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revuel/PatternOmatic/8f95c1c134a14419a11b8cb192144857b40d0b3c/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | """ Unit testing file for API module
 2 | 
 3 | This file is part of PatternOmatic.
 4 | 
 5 | Copyright © 2020  Miguel Revuelta Espinosa
 6 | 
 7 | PatternOmatic is free software: you can redistribute it and/or
 8 | modify it under the terms of the GNU Lesser General Public License
 9 | as published by the Free Software Foundation, either version 3 of
10 | the License, or (at your option) any later version.
11 | 
12 | PatternOmatic is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 | 
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | """
21 | import os
22 | import spacy
23 | from unittest import TestCase, mock
24 | from PatternOmatic.api import find_patterns
25 | from PatternOmatic.settings.config import Config
26 | from PatternOmatic.settings.log import LOG
27 | 
28 | 
29 | class Test(TestCase):
30 | 
31 |     my_samples = ['Hello world!', 'Goodbye world!']
32 | 
33 |     def test_find_patterns_when_only_samples_provided(self):
34 |         """ Tests that providing just samples makes the find_pattern keeps working """
35 |         patterns, _ = find_patterns(self.my_samples)
36 |         super().assertEqual(4, len(patterns))
37 | 
38 |     def test_find_patterns_when_valid_configuration_file_provided(self):
39 |         """ Checks that providing a valid configuration file path loads configuration from that file """
40 | 
41 |         config_file_path = \
42 |             os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini')
43 |         _ = find_patterns(self.my_samples, configuration=config_file_path)
44 |         super().assertEqual(config_file_path, Config().file_path)
45 | 
46 |     def test_find_patterns_when_config_instance_provided(self):
47 |         """ Checks when setting up a Config instance before find_patterns invocation works """
48 |         config = Config()
49 |         config.max_runs = 10
50 |         patterns, _ = find_patterns(self.my_samples)
51 |         super().assertEqual(10, len(patterns))
52 | 
53 |     def test_find_patterns_when_bad_language_provided(self):
54 |         """ Checks that providing an imaginary language model makes find_patterns use en_core_web_sm """
55 |         with super().assertLogs(LOG) as cm:
56 |             bad_model = 'Something'
57 |             _ = find_patterns(self.my_samples, spacy_language_model_name=bad_model)
58 |             super().assertEqual(f'WARNING:PatternOmatic:Model {bad_model} not found, falling back to '
59 |                                 f'patternOmatic\'s default language model: en_core_web_sm', cm.output[1])
60 | 
61 |     def test_installs_en_core_web_sm_if_not_found(self):
62 |         """ Due to questionable PyPI security policies, check en_core_web_sm installation is fired if not present """
63 |         nlp = spacy.load('en_core_web_sm')
64 | 
65 |         with mock.patch('PatternOmatic.api.pkg_resources.working_set') as patch_working_set:
66 |             with mock.patch('PatternOmatic.api.spacy_download') as patch_spacy_download:
67 |                 with mock.patch('PatternOmatic.api.spacy_load') as patch_spacy_load:
68 |                     patch_working_set.return_value = []
69 |                     patch_spacy_download.return_value = 'I\'ve been fired'
70 |                     patch_spacy_load.return_value = nlp
71 |                     find_patterns(['Hi'])
72 |                     super().assertTrue(patch_spacy_download.called)
73 | 
74 |     def tearDown(self) -> None:
75 |         """ Destroy Config instance """
76 |         Config.clear_instance()
77 | 


--------------------------------------------------------------------------------
/tests/test_bnf.py:
--------------------------------------------------------------------------------
  1 | """ Unit testing file for BNF module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import unittest
 22 | import spacy
 23 | from spacy.tokens.doc import Underscore
 24 | 
 25 | import PatternOmatic.nlp.bnf as bnf
 26 | from PatternOmatic.settings.literals import S, P, T, F, OP, NEGATION, ZERO_OR_ONE, ZERO_OR_MORE, ONE_OR_MORE, XPS, IN,\
 27 |     NOT_IN, EQQ, GEQ, LEQ, GTH, LTH, TOKEN_WILDCARD, UNDERSCORE, ORTH, TEXT, LOWER, POS, TAG, DEP, LEMMA, SHAPE, \
 28 |     IS_ASCII, IS_UPPER, HAS_VECTOR
 29 | from PatternOmatic.settings.config import Config
 30 | 
 31 | 
 32 | class TestDG(unittest.TestCase):
 33 |     """ Test class for Dynamic Grammar """
 34 | 
 35 |     nlp = spacy.load('en_core_web_sm')
 36 |     samples = [nlp(u'This is a test.'), nlp(u'Checks for Backus Naur Form grammars')]
 37 |     config = None
 38 | 
 39 |     def test_basic_grammar_dg(self):
 40 |         """ Tests that basic grammar is correctly generated """
 41 |         grammar = bnf.dynamic_generator(self.samples)
 42 | 
 43 |         super().assertIn(P, grammar.keys())
 44 |         super().assertIn(S, grammar.keys())
 45 |         super().assertIn(T, grammar.keys())
 46 |         super().assertIn(F, grammar.keys())
 47 |         super().assertEqual(len(grammar[SHAPE]), 7)
 48 |         super().assertEqual(len(grammar[F]), 9)
 49 | 
 50 |     def test_basic_grammar_without_uniques_dg(self):
 51 |         """ Tests that basic grammar is correctly generated when use uniques is false """
 52 |         self.config.use_uniques = False
 53 |         grammar = bnf.dynamic_generator(self.samples)
 54 | 
 55 |         super().assertEqual(len(grammar[SHAPE]), 11)
 56 | 
 57 |     def test_basic_grammar_with_booleans_dg(self):
 58 |         """ Tests that basic grammar with booleans is correctly generated """
 59 |         self.config.use_boolean_features = True
 60 |         grammar = bnf.dynamic_generator(self.samples)
 61 | 
 62 |         super().assertIn(IS_ASCII, grammar.keys())
 63 |         super().assertIn(IS_UPPER, grammar.keys())
 64 | 
 65 |     def test_basic_grammar_with_booleans_and_operators_dg(self):
 66 |         """ Tests that basic grammar with boolean features and operators is correctly generated """
 67 |         self.config.use_boolean_features = True
 68 |         self.config.use_grammar_operators = True
 69 | 
 70 |         grammar = bnf.dynamic_generator(self.samples)
 71 | 
 72 |         super().assertIn(IS_ASCII, grammar.keys())
 73 |         super().assertIn(IS_UPPER, grammar.keys())
 74 |         super().assertIn(OP, grammar.keys())
 75 |         super().assertListEqual(grammar[OP], [NEGATION, ZERO_OR_ONE, ONE_OR_MORE, ZERO_OR_MORE])
 76 | 
 77 |     def test_basic_grammar_with_booleans_and_extended_pattern_syntax_dg(self):
 78 |         """ Tests that basic grammar with boolean features and extended pattern syntax is correctly generated """
 79 |         self.config.use_boolean_features = True
 80 |         self.config.use_extended_pattern_syntax = True
 81 | 
 82 |         grammar = bnf.dynamic_generator(self.samples)
 83 | 
 84 |         super().assertIn(IS_ASCII, grammar.keys())
 85 |         super().assertIn(IS_UPPER, grammar.keys())
 86 |         super().assertIn(XPS, grammar.keys())
 87 |         super().assertListEqual(grammar[XPS], [IN, NOT_IN, EQQ, GEQ, LEQ, GTH, LTH])
 88 | 
 89 |     def test_basic_grammar_with_booleans_and_custom_attributes_dg(self):
 90 |         """ Tests that basic grammar with boolean features and custom attributes is correctly generated  """
 91 |         self.config.use_boolean_features = True
 92 |         self.config.use_custom_attributes = True
 93 | 
 94 |         grammar = bnf.dynamic_generator(self.samples)
 95 | 
 96 |         super().assertIn(IS_ASCII, grammar.keys())
 97 |         super().assertIn(IS_UPPER, grammar.keys())
 98 |         super().assertIn(UNDERSCORE, grammar.keys())
 99 |         # super().assertIn(IS_SENT_START, grammar.keys())
100 |         super().assertIn(HAS_VECTOR, grammar.keys())
101 | 
102 |     def test_basic_grammar_with_token_wildcard_dg(self):
103 |         """ Tests grammar is generated with token wildcard """
104 |         self.config.use_token_wildcard = True
105 | 
106 |         grammar = bnf.dynamic_generator(self.samples)
107 | 
108 |         super().assertIn(TOKEN_WILDCARD, grammar[T])
109 | 
110 |     def test_get_features_per_token(self):
111 |         """ Tests that the number of features per token is properly set given different configurations """
112 |         features_dict = {ORTH: None, TEXT: None, LOWER: None, POS: None, TAG: None, LEMMA: None}
113 |         len_features_dict = len(features_dict.keys())
114 | 
115 |         # When features_per_token is equal or lower to 0, the maximum number of features per token is set
116 |         self.config.features_per_token = 0
117 |         super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict))
118 |         self.config.features_per_token = -100
119 |         super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict))
120 | 
121 |         # When features_per_token is greater than the actual features, the maximum number of features per token is set
122 |         self.config.features_per_token = 100
123 |         super().assertEqual(len_features_dict, bnf._get_features_per_token(features_dict))
124 | 
125 |         # When features_per_token is inside the range (0, actual features), the config parameter is respected
126 |         self.config.features_per_token = 3
127 |         super().assertEqual(3, bnf._get_features_per_token(features_dict))
128 | 
129 |     def test_symbol_stacker(self):
130 |         """ Tests that symbols are stacked properly """
131 |         expected_1 = [DEP, DEP + ',' + DEP, DEP + ',' + DEP + ',' + DEP]
132 |         super().assertListEqual(expected_1, bnf._symbol_stacker(DEP, 3))
133 | 
134 |         expected_2 = [DEP + ',' + DEP,
135 |                       DEP + ',' + DEP + ',' + DEP,
136 |                       DEP + ',' + DEP + ',' + DEP + ',' + DEP]
137 | 
138 |         super().assertListEqual(expected_2, bnf._symbol_stacker(DEP, 4, 2))
139 | 
140 |         expected_2.insert(0, DEP)
141 | 
142 |         super().assertListEqual(expected_2, bnf._symbol_stacker(DEP, 4, 5))
143 | 
144 |         super().assertListEqual([expected_1[2]], bnf._symbol_stacker(DEP, 3, 3))
145 | 
146 |     #
147 |     # Helpers
148 |     #
149 |     def setUp(self) -> None:
150 |         """ Fresh Config instance """
151 |         self.config = Config()
152 | 
153 |     def tearDown(self) -> None:
154 |         """ Destroy Config instance, reset Underscore's token extensions """
155 |         Config.clear_instance()
156 |         Underscore.token_extensions = {}
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     unittest.main()
161 | 


--------------------------------------------------------------------------------
/tests/test_individual.py:
--------------------------------------------------------------------------------
  1 | """ Unit testing module for GE Individual module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import unittest
 22 | import spacy
 23 | 
 24 | from PatternOmatic.ge.stats import Stats
 25 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg
 26 | from PatternOmatic.ge.individual import Individual, Fitness
 27 | from PatternOmatic.settings.config import Config
 28 | from PatternOmatic.settings.literals import FitnessType, S, P, T, F, ORTH, TOKEN_WILDCARD, UNDERSCORE, IS_CURRENCY, \
 29 |     NOT_IN, ZERO_OR_MORE, OP, GTH, XPS, IN
 30 | 
 31 | 
 32 | class TestIndividual(unittest.TestCase):
 33 |     """ Unit Test class for GE Individual object """
 34 |     config = Config()
 35 | 
 36 |     nlp = spacy.load("en_core_web_sm")
 37 | 
 38 |     samples = [nlp(u'I am a raccoon!'),
 39 |                nlp(u'You are a cat!'),
 40 |                nlp(u'Is she a rabbit?'),
 41 |                nlp(u'This is a test')]
 42 | 
 43 |     grammar = dgg(samples)
 44 | 
 45 |     stats = Stats()
 46 | 
 47 |     def test_init(self):
 48 |         """ Test that Individual instantiation works """
 49 |         i = Individual(self.samples, self.grammar, self.stats)
 50 |         super().assertIs(type(i), Individual)
 51 | 
 52 |     def test_init_with_dna(self):
 53 |         """ Test that Individual instantiation works when providing dna"""
 54 |         i = Individual(self.samples, self.grammar, self.stats,  '10101010101010101010101010101010')
 55 |         super().assertNotEqual(i, None)
 56 | 
 57 |     def test_transcription(self):
 58 |         """ Check for transcription idempotency """
 59 |         self.config.mutation_probability = 0.0
 60 |         i = Individual(self.samples, self.grammar, self.stats, '11111111')
 61 |         i._transcription()
 62 |         i._transcription()
 63 |         i._transcription()
 64 | 
 65 |         super().assertListEqual(i.int_genotype, [127, 1])
 66 | 
 67 |     def test_translation(self):
 68 |         """ Check for translation idempotency """
 69 |         self.config.mutation_probability = 0.0
 70 |         i = Individual(self.samples, self.grammar, self.stats, '11111111')
 71 |         i._translation()
 72 |         i._translation()
 73 |         i._translation()
 74 |         super().assertListEqual(
 75 |             i.fenotype, [{'TEXT': 'am'}, {'TEXT': '?'}, {'TEXT': 'am'}, {'TEXT': '?'}, {'TEXT': 'am'}])
 76 | 
 77 |     def test_mutation(self):
 78 |         """ Checks that mutation works """
 79 |         self.config.mutation_probability = 1.0
 80 |         i = Individual(self.samples, self.grammar, self.stats, '11111111')
 81 |         super().assertNotEqual(i.bin_genotype, '11111111')
 82 | 
 83 |     def test_fitness_basic(self):
 84 |         """ Fitness "basic" sets fitness """
 85 |         self.config.mutation_probability = 0.0
 86 |         self.config.fitness_function_type = FitnessType.BASIC
 87 |         i = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101')
 88 | 
 89 |         super().assertEqual(i.fitness_value, 0.25)
 90 | 
 91 |     def test_fitness_full_match(self):
 92 |         """ Fitness "full match" sets fitness """
 93 |         self.config.mutation_probability = 0.0
 94 |         self.config.fitness_function_type = FitnessType.FULL_MATCH
 95 |         i = Individual(self.samples, self.grammar, self.stats, '01101010100001101000110111000100')
 96 | 
 97 |         super().assertEqual(i.fitness_value, 0.25)
 98 | 
 99 |     def test_token_wildcard_penalty(self):
100 |         """ Checks that token wildcard penalty is properly set """
101 |         # When using token wildcard, penalty is applied
102 |         f = object.__new__(Fitness)
103 |         f.fenotype = [{}, {}, {}, 'Whatever']
104 |         self.config.use_token_wildcard = True
105 |         f.config = self.config
106 |         super().assertEqual(0.25, f._wildcard_penalty(1.0))
107 | 
108 |         # When not using token wildcard, penalty is not applied
109 |         self.config.use_token_wildcard = False
110 |         f.fenotype = 1.0
111 |         super().assertEqual(1.0, f._wildcard_penalty(1.0))
112 | 
113 |     def test_translate(self):
114 |         """ Verifies conversions over the BNF are done correctly """
115 |         i = object.__new__(Individual)
116 | 
117 |         # Root
118 |         i.grammar = {S: [P]}
119 |         super().assertEqual('"S":"<P>"', i._translate(0, S, S))
120 | 
121 |         # Pattern root symbol to Token symbol
122 |         i.grammar = {P: [T]}
123 |         super().assertEqual(T, i._translate(0, P, P))
124 | 
125 |         # Token symbol to Feature symbol inside Token
126 |         i.grammar = {T: [F]}
127 |         super().assertEqual('{<F>}', i._translate(0, T, T))
128 | 
129 |         # Token symbol to wildcard
130 |         i.grammar = {T: [TOKEN_WILDCARD]}
131 |         super().assertEqual('{}', i._translate(0, T, T))
132 | 
133 |         # Feature symbol to specific symbol
134 |         i.grammar = {F: [ORTH]}
135 |         super().assertEqual('{<ORTH>}', i._translate(0, F, '{<F>}'))
136 | 
137 |         # Basic Terminal conversion
138 |         i.grammar = {ORTH: ['Test']}
139 |         super().assertEqual('{"ORTH":"Test"}', i._translate(0, ORTH, '{<ORTH>}'))
140 | 
141 |         # Underscore conversion
142 |         i.grammar = {UNDERSCORE: [IS_CURRENCY]}
143 |         super().assertEqual('{"_": {<CUSTOM_IS_CURRENCY>}}', i._translate(0, UNDERSCORE, '{<UNDERSCORE>}'))
144 | 
145 |         # Underscore terminal conversion
146 |         i.grammar = {IS_CURRENCY: [True]}
147 |         super().assertEqual('{"_": {"CUSTOM_IS_CURRENCY":"True"}}',
148 |                             i._translate(0, IS_CURRENCY, '{"_": {<CUSTOM_IS_CURRENCY>}}'))
149 | 
150 |         # Grammar Operators conversion
151 |         i.grammar = {OP: ZERO_OR_MORE}
152 |         super().assertEqual('"OP":"*"', i._translate(0, OP, '<OP>'))
153 | 
154 |         # Extended Pattern Syntax conversion (base)
155 |         i.grammar = {XPS: [IN]}
156 |         super().assertEqual('{<IN>}', i._translate(0, XPS, '<XPS>'))
157 | 
158 |         i.grammar = {ORTH: [XPS]}
159 |         super().assertEqual('"ORTH":<XPS>', i._translate(0, ORTH, '<ORTH>'))
160 | 
161 |         # Extended Pattern Syntax conversion (terminal logical)
162 |         i.grammar = {NOT_IN: [['Test']]}
163 |         super().assertEqual('{"ORTH": {"NOT_IN":["Test"]}}', i._translate(0, NOT_IN, '{"ORTH": {<NOT_IN>}}'))
164 | 
165 |         # Extended Pattern Syntax (terminal arithmetical)
166 |         i.grammar = {GTH: [5]}
167 |         super().assertEqual('{"LENGTH": {">":5}}', i._translate(0, GTH, '{"LENGTH": {<GTH>}}'))
168 | 
169 |     #
170 |     # Helpers
171 |     #
172 |     def setUp(self) -> None:
173 |         """ Fresh Config instance """
174 |         self.config = Config()
175 | 
176 |     def tearDown(self) -> None:
177 |         """ Destroy Config instance """
178 |         Config.clear_instance()
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     unittest.main()
183 | 


--------------------------------------------------------------------------------
/tests/test_population.py:
--------------------------------------------------------------------------------
  1 | """ Unit testing module for GE Population module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import unittest
 22 | import spacy
 23 | 
 24 | from PatternOmatic.ge.stats import Stats
 25 | from PatternOmatic.nlp.bnf import dynamic_generator as dgg
 26 | from PatternOmatic.ge.population import Population, Selection, Recombination, Replacement
 27 | from PatternOmatic.ge.individual import Individual
 28 | from PatternOmatic.settings.config import Config
 29 | from PatternOmatic.settings.literals import FitnessType, SelectionType, RecombinationType, ReplacementType
 30 | 
 31 | 
 32 | class BasePopulationTest(unittest.TestCase):
 33 |     """ Base class to supply shard attributes and helpers """
 34 |     #
 35 |     # Shared attributes
 36 |     #
 37 |     config = Config()
 38 | 
 39 |     nlp = spacy.load("en_core_web_sm")
 40 | 
 41 |     samples = [nlp(u'I am a raccoon!'),
 42 |                nlp(u'You are a cat!'),
 43 |                nlp(u'Is she a rabbit?'),
 44 |                nlp(u'This is a test')]
 45 | 
 46 |     grammar = dgg(samples)
 47 | 
 48 |     stats = Stats()
 49 | 
 50 |     #
 51 |     # Helpers
 52 |     #
 53 |     def setUp(self) -> None:
 54 |         """ Fresh Config instance """
 55 |         self.config = Config()
 56 | 
 57 |     def tearDown(self) -> None:
 58 |         """ Destroy Config instance """
 59 |         Config.clear_instance()
 60 | 
 61 | 
 62 | class TestPopulation(BasePopulationTest):
 63 |     """ Unit Test class for GE Population object """
 64 | 
 65 |     def test_initialize(self):
 66 |         """ Tests that a population is correctly filled with Individuals """
 67 |         p = Population(self.samples, self.grammar, self.stats)
 68 | 
 69 |         super().assertIsInstance(p.generation[0], Individual)
 70 | 
 71 |     def test_best_challenge(self):
 72 |         """ Tests that the most fitted individual occupies the population's best_individual slot """
 73 |         self.config.max_generations = 3
 74 |         self.config.fitness_function_type = FitnessType.BASIC
 75 |         p = Population(self.samples, self.grammar, self.stats)
 76 |         self.config.mutation_probability = 0.0
 77 |         p.generation[0] = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101')
 78 |         self.config.mutation_probability = 0.5
 79 |         p.evolve()
 80 | 
 81 |         super().assertGreaterEqual(p.best_individual.fitness_value, 0.2)
 82 | 
 83 |     def test_binary_tournament(self):
 84 |         """ Test that binary tournament works as expected """
 85 |         self.config.max_generations = 3
 86 |         self.config.fitness_function_type = FitnessType.FULL_MATCH
 87 |         self.config.selection_type = SelectionType.BINARY_TOURNAMENT
 88 |         p = Population(self.samples, self.grammar, self.stats)
 89 |         mating_pool = p.selection(p.generation)
 90 | 
 91 |         super().assertNotEqual(p.generation, mating_pool)
 92 | 
 93 |     def test_k_tournament(self):
 94 |         """ Test that k tournament raises error """
 95 |         self.config.selection_type = SelectionType.K_TOURNAMENT
 96 |         p = Population(self.samples, self.grammar, self.stats)
 97 |         with super().assertRaises(NotImplementedError):
 98 |             _ = p.selection(p.generation)
 99 | 
100 |     def test_random_one_point_crossover(self):
101 |         """ Test that crossover 'random one point' works as expected """
102 |         self.config.max_generations = 3
103 |         self.config.fitness_function_type = FitnessType.BASIC
104 |         self.config.selection_type = SelectionType.BINARY_TOURNAMENT
105 |         self.config.recombination_type = RecombinationType.RANDOM_ONE_POINT_CROSSOVER
106 |         p = Population(self.samples, self.grammar, self.stats)
107 |         mating_pool = p.selection(p.generation)
108 |         p.offspring = p.recombination(mating_pool, p.generation)
109 |         super().assertNotEqual(p.generation, p.offspring)
110 | 
111 |     def test_mu_plus_lambda(self):
112 |         """ Tests that replacement 'mu plus lambda' works as expected """
113 |         self.config.replacement_type = ReplacementType.MU_PLUS_LAMBDA
114 |         p = Population(self.samples, self.grammar, self.stats)
115 |         mating_pool = p.selection(p.generation)
116 |         p.offspring = p.recombination(mating_pool, p.generation)
117 |         p.generation, p.offspring = p.replacement(p.generation, p.offspring)
118 |         super().assertListEqual(p.offspring, [])
119 | 
120 |     def test_mu_lambda_elite(self):
121 |         """ Tests that replacement 'mu lambda with elitism' works as expected """
122 |         self.config.replacement_type = ReplacementType.MU_LAMBDA_WITH_ELITISM
123 |         p = Population(self.samples, self.grammar, self.stats)
124 |         mating_pool = p.selection(p.generation)
125 |         p.offspring = p.recombination(mating_pool, p.generation)
126 |         p.generation, p.offspring = p.replacement(p.generation, p.offspring)
127 |         super().assertListEqual(p.offspring, [])
128 | 
129 |     def test_mu_lambda_no_elite(self):
130 |         """ Tests that replacement 'mu lambda without elitism' works as expected """
131 |         self.config.replacement_type = ReplacementType.MU_LAMBDA_WITHOUT_ELITISM
132 |         p = Population(self.samples, self.grammar, self.stats)
133 |         mating_pool = p.selection(p.generation)
134 |         p.offspring = p.recombination(mating_pool, p.generation)
135 |         p.generation, p.offspring = p.replacement(p.generation, p.offspring)
136 |         super().assertListEqual(p.offspring, [])
137 | 
138 |     def test_evolve(self):
139 |         """ Tests that an evolution works, preserving a fitted individual """
140 |         self.config.max_generations = 3
141 |         self.config.fitness_function_type = FitnessType.BASIC
142 |         p = Population(self.samples, self.grammar, self.stats)
143 |         self.config.mutation_probability = 0.0
144 |         p.generation[0] = Individual(self.samples, self.grammar, self.stats, '01110101100101100110010110010101')
145 |         self.config.mutation_probability = 0.5
146 |         p.evolve()
147 |         super().assertLessEqual(0.25, p.generation[0].fitness_value)
148 | 
149 |     def test_best_challenge_changes_best_individual(self):
150 |         """ Covers best challenge cases """
151 |         self.config.mutation_probability = 0.0
152 |         self.config.fitness_function_type = FitnessType.BASIC
153 | 
154 |         p = Population(self.samples, self.grammar, self.stats)
155 |         i1 = Individual(self.samples, self.grammar, self.stats, dna='00000000000000000000000000000000')
156 |         i2 = Individual(self.samples, self.grammar, self.stats, dna='01110101100101100110010110010101')
157 | 
158 |         # When there's no best individual yet, population's best individual is updated
159 |         p.best_individual = None
160 |         p.generation = [i2]
161 |         p._best_challenge()
162 | 
163 |         super().assertEqual(p.best_individual, p.generation[0])
164 | 
165 |         # When a better individual is better fitted in a new generation, population's best individual is updated
166 |         p.best_individual = i1
167 |         p.generation = [i2]
168 |         p._best_challenge()
169 | 
170 |         super().assertEqual(p.best_individual, p.generation[0])
171 | 
172 |         # When a worse individual is the most fitted in a new generation, population's best individual remains the same
173 |         p.best_individual = i2
174 |         p.generation = [i1]
175 |         p._best_challenge()
176 | 
177 |         super().assertEqual(i2, p.best_individual)
178 | 
179 |     def test_sr_update(self):
180 |         """ Check SR is updated if a solution is found for the run """
181 |         stats = Stats()
182 | 
183 |         self.config.max_generations = 1
184 |         self.config.population_size = 3
185 |         self.config.fitness_function_type = FitnessType.BASIC
186 |         self.config.mutation_probability = 0.0
187 | 
188 |         self.config.success_threshold = 0.0
189 |         p = Population(self.samples, self.grammar, stats)
190 |         p.generation[0] = Individual(self.samples, self.grammar, stats, '01110101100101100110010110010101')
191 |         p.evolve()
192 |         super().assertListEqual([True], stats.success_rate_accumulator)
193 | 
194 |         self.config.success_threshold = 1.0
195 |         self.config.population_size = 1
196 |         p = Population(self.samples, self.grammar, stats)
197 |         p.generation[0] = Individual(self.samples, self.grammar, stats, '00000000000000000000000000000000')
198 |         p.evolve()
199 |         super().assertListEqual([True, False], stats.success_rate_accumulator)
200 | 
201 | 
202 | class TestSelection(BasePopulationTest):
203 |     """ Unit Test class for GE Selection object """
204 | 
205 |     def test_dispatch(self):
206 |         """ Dispatcher method provides the proper selection method """
207 |         selection = Selection(SelectionType.BINARY_TOURNAMENT)
208 |         super().assertIs(selection._select, Selection._binary_tournament)
209 | 
210 |         selection = Selection(SelectionType.K_TOURNAMENT)
211 |         super().assertIs(selection._select, Selection._k_tournament)
212 | 
213 |         # Check unknown SelectionType
214 |         selection = Selection(None)
215 |         super().assertIs(selection._select, Selection._binary_tournament)
216 | 
217 | 
218 | class TestRecombination(BasePopulationTest):
219 |     """ Unit Test class for GE Recombination object """
220 | 
221 |     def test_dispatch(self):
222 |         """ Dispatcher method provides the proper recombine method """
223 |         recombination = Recombination(self.grammar, self.samples, self.stats)
224 |         super().assertEqual(recombination._recombine, recombination._random_one_point_crossover)
225 | 
226 | 
227 | class TestReplacement(BasePopulationTest):
228 |     """ Unit Test class for GE Replacement object """
229 | 
230 |     def test_dispatch(self):
231 |         """ Dispatcher method provides the proper replacement method """
232 |         replacement = Replacement(ReplacementType.MU_PLUS_LAMBDA)
233 |         super().assertIs(replacement._replace, Replacement._mu_plus_lambda)
234 | 
235 |         replacement = Replacement(ReplacementType.MU_LAMBDA_WITH_ELITISM)
236 |         super().assertIs(replacement._replace, Replacement._mu_lambda_elite)
237 | 
238 |         replacement = Replacement(ReplacementType.MU_LAMBDA_WITHOUT_ELITISM)
239 |         super().assertIs(replacement._replace, Replacement._mu_lambda_no_elite)
240 | 
241 |         # Check unknown ReplacementType
242 |         replacement = Replacement(None)
243 |         super().assertIs(replacement._replace, Replacement._mu_plus_lambda)
244 | 
245 | 
246 | if __name__ == "__main__":
247 |     unittest.main()
248 | 


--------------------------------------------------------------------------------
/tests/test_script.py:
--------------------------------------------------------------------------------
 1 | """ Unit testing file for CLI module
 2 | 
 3 | This file is part of PatternOmatic.
 4 | 
 5 | Copyright © 2020  Miguel Revuelta Espinosa
 6 | 
 7 | PatternOmatic is free software: you can redistribute it and/or
 8 | modify it under the terms of the GNU Lesser General Public License
 9 | as published by the Free Software Foundation, either version 3 of
10 | the License, or (at your option) any later version.
11 | 
12 | PatternOmatic is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 | 
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | """
21 | import os
22 | import scripts.patternomatic as pom
23 | 
24 | from unittest import TestCase, mock
25 | from spacy import load as spacy_load
26 | from PatternOmatic.settings.log import LOG
27 | 
28 | 
29 | class TestPatternomaticScript(TestCase):
30 |     """ Test class to verify patternomatic.py correct behaviour """
31 | 
32 |     nlp = spacy_load('en_core_web_sm')
33 | 
34 |     samples = [nlp(u'My shirt is white'),
35 |                nlp(u'My cat is black'),
36 |                nlp(u'Your home is comfortable'),
37 |                nlp(u'Their attitude is great')]
38 | 
39 |     config_file_path = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini')
40 | 
41 |     full_args = ['-s', 'Hello', '-s', 'Goodbye', '-c', config_file_path, '-l', 'en_core_web_sm']
42 | 
43 |     def test_main(self):
44 |         """ Checks that main method works """
45 |         with super().assertLogs(LOG) as cm:
46 |             pom.main(self.full_args)
47 |             super().assertIn('INFO:PatternOmatic:Best individuals for this execution:', cm.output)
48 | 
49 |     def test_main_errors_raised(self):
50 |         """ Checks that main raises errors when bad arguments are supplied """
51 |         # No args
52 |         with super().assertRaises(SystemExit):
53 |             pom.main([])
54 | 
55 |         # Wrong args
56 |         with super().assertRaises(SystemExit):
57 |             pom.main(['-k'])
58 | 
59 |         # Wrong lang
60 |         with super().assertLogs(LOG) as cm:
61 |             bad_model = 'bad_model'
62 |             args = self.full_args.copy()[:-1]
63 |             args.append(bad_model)
64 |             pom.main(args)
65 |             super().assertEqual(f'WARNING:PatternOmatic:Model {bad_model} not found, falling back to '
66 |                                 f'patternOmatic\'s default language model: en_core_web_sm', cm.output[2])
67 | 
68 |         # Fatal error
69 |         with mock.patch('scripts.patternomatic.ArgumentParser') as mock_arg_parser:
70 |             mock_arg_parser.return_value = Exception('Mocked exception')
71 | 
72 |             with super().assertRaises(Exception):
73 |                 pom.main(self.full_args)
74 | 
75 |     def test_patternomatic_script(self):
76 |         """ Checks that patternomatic can be run as a script properly """
77 |         script_path = os.path.join(
78 |             os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'scripts', 'patternomatic.py')
79 | 
80 |         output_signal = os.system('python ' + script_path + ' -s Hello -s Goodbye')
81 |         super().assertEqual(0, output_signal)
82 | 


--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
  1 | """ Unit testing module for settings module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import configparser
 22 | import os
 23 | import unittest
 24 | 
 25 | from PatternOmatic.settings.config import Config, RecombinationType
 26 | 
 27 | 
 28 | class TestConfig(unittest.TestCase):
 29 |     """ Test class for settings """
 30 | 
 31 |     config = None
 32 | 
 33 |     def test_config_is_singleton(self):
 34 |         """ Tests config instance is a singleton one """
 35 |         another_config = Config()
 36 |         super().assertEqual(self.config, another_config)
 37 | 
 38 |     def test_config_is_clearable(self):
 39 |         """ Tests its possible to renew the singleton instance """
 40 |         Config.clear_instance()
 41 |         another_config = Config()
 42 | 
 43 |         super().assertNotEqual(self.config, another_config)
 44 | 
 45 |     def test_config_read_from_path(self):
 46 |         """ Tests providing or not providing a configuration file works as expected"""
 47 |         # No config file provided
 48 |         super().assertEqual(None, self.config.file_path)
 49 | 
 50 |         # Correct config file provided
 51 | 
 52 |         file_path = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir), 'config.ini')
 53 |         Config.clear_instance()
 54 |         self.config = Config(file_path)
 55 |         super().assertEqual(file_path, self.config.file_path)
 56 | 
 57 |         # Bad path provided
 58 |         Config.clear_instance()
 59 |         self.config = Config('')
 60 |         super().assertEqual(None, self.config.file_path)
 61 | 
 62 |     def test_xps_gop_can_not_be_enabled_together(self):
 63 |         """ Tests Spacy's Grammar Operators and Extended Patter Syntax can not be enabled both """
 64 |         config = Config()
 65 |         config.use_grammar_operators = True
 66 |         config.use_extended_pattern_syntax = True
 67 |         super().assertNotEqual(config.use_grammar_operators, config.use_extended_pattern_syntax)
 68 | 
 69 |         config.use_grammar_operators = False
 70 |         config.use_extended_pattern_syntax = True
 71 |         super().assertEqual(True, config.use_extended_pattern_syntax)
 72 | 
 73 |         config.use_grammar_operators = True
 74 |         super().assertEqual(False, config.use_extended_pattern_syntax)
 75 | 
 76 |     def test_setting_config_attribute_with_wrong_type_has_no_effect(self):
 77 |         config = Config()
 78 | 
 79 |         config.max_runs = 0.5
 80 |         config.use_extended_pattern_syntax = None
 81 |         config.fitness_function_type = RecombinationType.RANDOM_ONE_POINT_CROSSOVER
 82 |         config.report_path = 0
 83 | 
 84 |         super().assertNotEqual(config.max_runs, 0.5)
 85 |         super().assertNotEqual(config.use_extended_pattern_syntax, None)
 86 |         super().assertNotEqual(config.fitness_function_type, RecombinationType.RANDOM_ONE_POINT_CROSSOVER)
 87 |         super().assertNotEqual(config.report_path, 0)
 88 | 
 89 |     def test_validate_config_argument(self):
 90 |         """ Checks that config arguments are properly fetched according to its type """
 91 |         config_parser = configparser.ConfigParser()
 92 | 
 93 |         test_section = 'test_section'
 94 |         test_option_int = 'test_option_int'
 95 |         test_option_float = 'test_option_float'
 96 |         test_option_boolean = 'test_option_boolean'
 97 |         test_option_string = 'test_option_string'
 98 | 
 99 |         config_parser.add_section(test_section)
100 | 
101 |         config_parser[test_section][test_option_int] = '0'
102 |         config_parser[test_section][test_option_float] = '0.0'
103 |         config_parser[test_section][test_option_boolean] = 'False'
104 |         config_parser[test_section][test_option_string] = ''
105 | 
106 |         # With valid types
107 |         super().assertEqual(
108 |             0, self.config._validate_config_argument(test_section, test_option_int, 1, config_parser))
109 |         super().assertEqual(
110 |             .0, self.config._validate_config_argument(test_section, test_option_float, .1, config_parser))
111 |         super().assertEqual(
112 |             False, self.config._validate_config_argument(test_section, test_option_boolean, True, config_parser))
113 |         super().assertEqual(
114 |             '', self.config._validate_config_argument(test_section, test_option_string, 'Whatever', config_parser))
115 | 
116 |         # With wrong type
117 |         config_parser[test_section][test_option_int] = 'False'
118 |         super().assertEqual(
119 |             1, self.config._validate_config_argument(test_section, test_option_int, 1, config_parser))
120 | 
121 |         # With not even a possible type used by the config parser
122 |         super().assertEqual(
123 |             {}, self.config._validate_config_argument(test_section, test_option_int, {}, config_parser))
124 | 
125 |     #
126 |     # Helpers
127 |     #
128 |     def setUp(self) -> None:
129 |         """ Fresh Config instance """
130 |         self.config = Config()
131 | 
132 |     def tearDown(self) -> None:
133 |         """ Destroy Config instance """
134 |         Config.clear_instance()
135 | 


--------------------------------------------------------------------------------
/tests/test_stats.py:
--------------------------------------------------------------------------------
  1 | """ Unit testing module for stats module
  2 | 
  3 | This file is part of PatternOmatic.
  4 | 
  5 | Copyright © 2020  Miguel Revuelta Espinosa
  6 | 
  7 | PatternOmatic is free software: you can redistribute it and/or
  8 | modify it under the terms of the GNU Lesser General Public License
  9 | as published by the Free Software Foundation, either version 3 of
 10 | the License, or (at your option) any later version.
 11 | 
 12 | PatternOmatic is distributed in the hope that it will be useful,
 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | GNU Lesser General Public License for more details.
 16 | 
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with PatternOmatic. If not, see <https://www.gnu.org/licenses/>.
 19 | 
 20 | """
 21 | import os
 22 | from unittest import TestCase, mock
 23 | 
 24 | from PatternOmatic.ge.individual import Individual
 25 | from PatternOmatic.ge.stats import Stats
 26 | from PatternOmatic.settings.config import Config
 27 | from PatternOmatic.settings.literals import ReportFormat
 28 | 
 29 | 
 30 | class TestStats(TestCase):
 31 |     """ Tests for Stats class """
 32 | 
 33 |     stats = None
 34 |     test_report_path_file = 'test_report_path_file.txt'
 35 |     fitness_value_literal = 'fitness_value'
 36 | 
 37 |     def test_add_sr(self):
 38 |         """ SR accumulator works """
 39 |         self.stats.add_sr(True)
 40 |         super().assertListEqual([True], self.stats.success_rate_accumulator)
 41 | 
 42 |     def test_add_mbf(self):
 43 |         """ MBF accumulator works """
 44 |         self.stats.add_mbf(0.5)
 45 |         super().assertListEqual([0.5], self.stats.mbf_accumulator)
 46 | 
 47 |     def test_add_aes(self):
 48 |         """ AES accumulator works """
 49 |         self.stats.add_aes(10)
 50 |         super().assertListEqual([10], self.stats.aes_accumulator)
 51 | 
 52 |     def test_add_time(self):
 53 |         """ Time accumulator works """
 54 |         self.stats.add_time(0.2222)
 55 |         super().assertListEqual([0.2222], self.stats.time_accumulator)
 56 | 
 57 |     def test_add_most_fitted(self):
 58 |         """ Most fitted accumulator works """
 59 |         expected = object.__new__(Individual)
 60 |         expected.__setattr__(self.fitness_value_literal, 0.5)
 61 | 
 62 |         self.stats.add_most_fitted(expected)
 63 |         super().assertListEqual([expected], self.stats.most_fitted_accumulator)
 64 | 
 65 |     def test_sum_aes(self):
 66 |         """ Time counter works """
 67 |         self.stats.sum_aes(2)
 68 |         self.stats.sum_aes(2)
 69 |         super().assertEqual(4, self.stats.aes_counter,)
 70 | 
 71 |     def test_reset(self):
 72 |         """ Reset stats method works """
 73 |         self.stats.aes_counter = 100
 74 |         self.stats.solution_found = True
 75 |         self.stats.reset()
 76 |         super().assertEqual(0, self.stats.aes_counter)
 77 |         super().assertEqual(False, self.stats.solution_found)
 78 | 
 79 |     def test_calculate_metrics(self):
 80 |         """ Calculate metrics works """
 81 |         self.stats.success_rate_accumulator = [1, 1, 1]
 82 |         self.stats.mbf_accumulator = [2, 2, 2]
 83 |         self.stats.aes_counter = 100
 84 |         self.stats.time_accumulator = [3, 3, 3]
 85 | 
 86 |         self.stats.calculate_metrics()
 87 | 
 88 |         super().assertEqual(1, self.stats.success_rate)
 89 |         super().assertEqual(2, self.stats.mbf)
 90 |         super().assertEqual(100, self.stats.aes)
 91 |         super().assertEqual(3, self.stats.mean_time)
 92 | 
 93 |     def test_get_most_fitted(self):
 94 |         """ Most fitted individual is found on most fitted accumulator """
 95 |         i1 = object.__new__(Individual)
 96 |         i1.__setattr__(self.fitness_value_literal, 0.01)
 97 |         i2 = object.__new__(Individual)
 98 |         i2.__setattr__(self.fitness_value_literal, 0.1)
 99 |         i3 = object.__new__(Individual)
100 |         i3.__setattr__(self.fitness_value_literal, 0.001)
101 | 
102 |         mock_individual_list = list()
103 | 
104 |         mock_individual_list.append(i1)
105 |         mock_individual_list.append(i2)
106 |         mock_individual_list.append(i3)
107 | 
108 |         self.stats.most_fitted_accumulator = mock_individual_list
109 | 
110 |         super().assertEqual(self.stats.get_most_fitted(), i2)
111 | 
112 |     def test_avg(self):
113 |         """ Average implementation works """
114 |         test_list_1 = [1, 2, 3]
115 |         super().assertEqual(2, self.stats.avg(test_list_1))
116 | 
117 |     def test_dict_and_repr(self):
118 |         """ Checks that Stats instances are properly represented """
119 |         stats_dict = {
120 |             'success_rate': 1.0,
121 |             'mbf': 0.5,
122 |             'aes': 100,
123 |             'mean_time': 4.5,
124 |             'most_fitted': None
125 |         }
126 | 
127 |         # Check that with no best individual representation is well formed
128 |         stats = Stats()
129 |         stats.success_rate = stats_dict['success_rate']
130 |         stats.mbf = stats_dict['mbf']
131 |         stats.aes = stats_dict['aes']
132 |         stats.mean_time = stats_dict['mean_time']
133 | 
134 |         super().assertEqual(stats.__dict__, stats_dict)
135 |         super().assertEqual(dict(stats), stats_dict)
136 |         super().assertEqual(f'Stats({repr(stats_dict)})', repr(stats))
137 | 
138 |         # Check that with most fitted accumulator representation is well formed
139 |         i = object.__new__(Individual)
140 |         i.__setattr__(self.fitness_value_literal, 1.0)
141 | 
142 |         stats.most_fitted_accumulator = [i]
143 |         stats_dict['most_fitted'] = i.__dict__
144 | 
145 |         super().assertDictEqual(stats_dict, stats.__dict__)
146 |         super().assertEqual(stats_dict, dict(stats))
147 |         super().assertEqual(f'Stats({repr(stats_dict)})', repr(stats))
148 | 
149 |     def test_persist(self):
150 |         config = Config()
151 |         config.report_format = ReportFormat.JSON
152 |         config.report_path = self.test_report_path_file
153 | 
154 |         # When a best individual has been found
155 |         i = object.__new__(Individual)
156 |         i.__setattr__(self.fitness_value_literal, 1.0)
157 |         self.stats.aes = 100
158 |         self.stats.mbf = 0.9
159 |         self.stats.mean_time = 0.42
160 |         self.stats.success_rate = 1.0
161 |         self.stats.most_fitted_accumulator = [i]
162 |         self.stats.persist()
163 | 
164 |         with open(self.test_report_path_file, 'r') as persisted_report:
165 |             red_report = persisted_report.readlines()
166 | 
167 |         super().assertEqual(str(dict(self.stats)) + '\n', red_report[0])
168 | 
169 |         # When a best individual has not been found
170 |         self.stats.most_fitted_accumulator = []
171 |         self.stats.persist()
172 | 
173 |         with open(self.test_report_path_file, 'r') as persisted_report:
174 |             red_report = persisted_report.readlines()
175 | 
176 |         super().assertEqual(str(dict(self.stats)) + '\n', red_report[1])
177 | 
178 |     def test_to_csv(self):
179 |         """ Test stats instance dict to csv conversion """
180 |         with mock.patch('PatternOmatic.ge.stats.time') as mock_time:
181 |             mock_time.return_value = .123
182 |             self.stats.aes = 10
183 |             self.stats.mbf = 0.5
184 |             self.stats.mean_time = 0.22
185 |             self.stats.success_rate = 0.5
186 | 
187 |             # When a best individual has not been found
188 |             csv_stats = \
189 |                 f'{.123}\t{self.stats.mbf}\t{self.stats.success_rate}\t{self.stats.aes}\t{self.stats.mean_time}\t' \
190 |                 f'{None}\t'
191 | 
192 |             super().assertEqual(csv_stats, self.stats._to_csv())
193 | 
194 |             # When a best individual has been found
195 |             i = object.__new__(Individual)
196 |             i.__setattr__(self.fitness_value_literal, 1.0)
197 |             self.stats.most_fitted_accumulator = [i]
198 | 
199 |             csv_stats += f'{None}\t{i.fitness_value}\t'
200 |             super().assertEqual(csv_stats, self.stats._to_csv())
201 | 
202 |             # Also check csv is correctly persisted
203 |             config = Config()
204 |             config.report_path = self.test_report_path_file
205 |             config.report_format = ReportFormat.CSV
206 |             self.stats.persist()
207 | 
208 |             with open(self.test_report_path_file, 'r') as persisted_report:
209 |                 red_report = persisted_report.readlines()
210 | 
211 |             super().assertEqual(csv_stats + '\n', red_report[0])
212 | 
213 |     #
214 |     # Helpers
215 |     #
216 |     def setUp(self) -> None:
217 |         """ Fresh Stats instance """
218 |         self.stats = Stats()
219 |         if os.path.exists(self.test_report_path_file):
220 |             os.remove(self.test_report_path_file)
221 | 
222 |     @classmethod
223 |     def tearDownClass(cls) -> None:
224 |         """ Remove temporary report file  """
225 |         if os.path.exists(cls.test_report_path_file):
226 |             os.remove(cls.test_report_path_file)
227 | 


--------------------------------------------------------------------------------