├── example.png ├── .gitattributes ├── requirements.txt ├── LICENSE ├── README.md ├── oruga2_gde3.py ├── comparison ├── wordnet │ ├── jaya.py │ ├── gde3.py │ ├── nsga2.py │ ├── pso.py │ ├── cs.py │ └── tlbo.py ├── word2vec │ ├── jaya.py │ ├── gde3.py │ ├── nsga2.py │ ├── pso.py │ ├── cs.py │ └── tlbo.py └── web │ ├── jaya.py │ ├── gde3.py │ ├── nsga2.py │ ├── pso.py │ ├── cs.py │ └── tlbo.py ├── oruga2_nsga2.py ├── oruga3_gde3_wmd.py ├── oruga3_nsga2_wmd.py ├── texts.txt ├── oruga_wordnet.py ├── oruga_word2vec.py ├── oruga_massive_experiments.py ├── oruga_webscraping.py └── oruga_massive_experiments_smog.py /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jorge-martinez-gil/oruga/HEAD/example.png -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gensim==4.2.0 2 | jmetalpy==1.5.5 3 | language_tool_python==2.7.1 4 | nltk==3.6.5 5 | pygad==2.1.0 6 | py-readability-metrics==1.4.5 7 | Requests==2.28.1 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 jorge-martinez-gil 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ORUGA: Optimizing Readability Using Genetic Algorithms 2 | 3 | [![DOI](https://img.shields.io/badge/DOI-10.1016%2Fj.knosys.2023.111273-blue.svg)](https://doi.org/10.1016/j.knosys.2023.111273) 4 | [![Journal](https://img.shields.io/badge/Journal-Knowledge--Based_Systems-orange.svg)](https://www.sciencedirect.com/journal/knowledge-based-systems) 5 | [![PyGAD Version](https://img.shields.io/badge/PyGAD-2.1.0-red.svg)](https://pypi.org/project/pygad/2.1.0/) 6 | [![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) 7 | 8 | > **Official implementation of the paper published in *Knowledge-Based Systems* (2024).** 9 | 10 | **ORUGA** (**O**ptimizing **R**eadability **U**sing **G**enetic **A**lgorithms) is an unsupervised framework designed to automatically enhance the readability of text. Unlike deep learning approaches that require massive training datasets, ORUGA uses evolutionary strategies (Genetic Algorithms) to minimize complexity metrics (like FKGL) while preserving semantic meaning. 11 | 12 | ![ORUGA Process Example](example.png) 13 | 14 | ## 📄 Citation 15 | If you utilize this framework or code in your research, **please cite the following paper**: 16 | 17 | ```bibtex 18 | @article{martinez2024oruga, 19 | author = {Jorge Martinez-Gil}, 20 | title = {Optimizing readability using genetic algorithms}, 21 | journal = {Knowledge-Based Systems}, 22 | volume = {284}, 23 | pages = {111273}, 24 | year = {2024}, 25 | issn = {0950-7051}, 26 | doi = {10.1016/j.knosys.2023.111273} 27 | } 28 | ```` 29 | 30 | ## 📚 Tutorials & Context 31 | 32 | For a general audience overview of the concepts behind this framework, refer to this three-part series on Medium: 33 | 34 | * [Part 1: Introduction to Readability Optimization](https://medium.com/@jorgemarcc/readability-optimization-in-python-1-3-4491a5216cf0) 35 | * [Part 2: Implementation Details](https://medium.com/@jorgemarcc/readabilty-optimization-in-python-2-3-39a4bc4e98e) 36 | * [Part 3: Advanced Optimization Strategies](https://medium.com/@jorgemarcc/readability-optimization-in-python-3-3-7cbe204cafef) 37 | 38 | ## ⚙️ Installation 39 | 40 | To reproduce the experiments, install the dependencies: 41 | 42 | ```bash 43 | pip install -r requirements.txt 44 | ``` 45 | 46 | > [\!WARNING] 47 | > **CRITICAL DEPENDENCY CONFLICT** 48 | > 49 | > 1. **Package Name:** Ensure you use `pygad==2.1.0`. Newer versions may cause compatibility errors with the evolutionary logic. 50 | > 2. **Namespace Conflict:** There is a known namespace collision between `Readability` and `readability-lxml`. 51 | > * This project uses `py-readability-metrics`. 52 | > * **Do not** install `readability-lxml` in the same environment, or the imports will fail. 53 | 54 | ## 🧪 Experimental Reproduction 55 | 56 | The repository allows you to reproduce the single-objective and multi-objective evolutionary experiments reported in the paper. 57 | 58 | ### 1\. Single-Objective Optimization 59 | 60 | These scripts focus solely on minimizing the **FKGL (Flesch-Kincaid Grade Level)** score using different synonym replacement strategies. 61 | 62 | | Script | Strategy | Description | 63 | | :--- | :--- | :--- | 64 | | `oruga_wordnet.py` | **WordNet** | Uses the NLTK WordNet lexical database for synonym retrieval. Fast and standard. | 65 | | `oruga_word2vec.py` | **Word2Vec** | Uses vector embeddings to find synonyms. *Note: Slower execution due to vector operations.* | 66 | | `oruga_webscraping.py` | **Web** | Scrapes external thesaurus sites. *Note: Please use responsibly to avoid rate limiting.* | 67 | 68 | ### 2\. Multi-Objective Optimization (NSGA-II & GDE3) 69 | 70 | These scripts implement the advanced contributions of the paper, simultaneously minimizing **Readability Score (FKGL)** and **Text modification rate**, preventing the algorithm from changing too many words (Semantic Drift). 71 | 72 | **Using NSGA-II (Non-dominated Sorting Genetic Algorithm II):** 73 | 74 | ```bash 75 | # Basic Semantic Protection 76 | python oruga2_nsga2.py 77 | 78 | # Advanced Semantic Protection (using Word Mover's Distance - WMD) 79 | python oruga2_nsga2_wmd.py 80 | ``` 81 | 82 | **Using GDE3 (Generalized Differential Evolution 3):** 83 | 84 | ```bash 85 | # Basic Semantic Protection 86 | python oruga2_gde3.py 87 | 88 | # Advanced Semantic Protection (using Word Mover's Distance - WMD) 89 | python oruga2_gde3_wmd.py 90 | ``` 91 | 92 | ## 📊 Dataset 93 | 94 | The repository includes `texts.txt`, which contains the benchmarking dataset used in the study: 95 | 96 | * **Content:** 10 text samples extracted from Wikipedia. 97 | * **Diversity:** Varies in length, topic, and initial complexity levels to test the robustness of the algorithm. 98 | 99 | ## 📄 License 100 | 101 | This project is licensed under the **MIT License**. 102 | -------------------------------------------------------------------------------- /oruga2_gde3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.gde3 import GDE3 11 | from jmetal.util.termination_criterion import StoppingByEvaluations 12 | from readability import Readability 13 | from nltk.corpus import wordnet 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | 17 | 18 | def listToString(s): 19 | str1 = "" 20 | for ele in s: 21 | str1 += str(ele) 22 | str1 += " " 23 | 24 | str1 = str1.replace(' ,', ',') 25 | str1 = str1.replace('_', ' ') 26 | return str1 27 | 28 | def Synonym(word, number): 29 | synonyms = [] 30 | for syn in wordnet.synsets(word): 31 | for lm in syn.lemmas(): 32 | synonyms.append(lm.name()) 33 | 34 | if (not synonyms): 35 | return -2, word 36 | elif number >= len(synonyms): 37 | return len(synonyms)-1, synonyms[len(synonyms)-1] 38 | else: 39 | return int(number), synonyms[int(number-1)] 40 | 41 | def fitness_func1(solution): 42 | #preprocessing 43 | a = 0 44 | for i in index_array: 45 | if index_array[a] <= 0: 46 | solution[a] = 0 47 | a += 1 48 | 49 | res2 = text.split() 50 | text_converted = [] 51 | index=0 52 | for i in res2: 53 | if solution[index] < 1: 54 | text_converted.append (i) 55 | elif solution[index] >= 1: 56 | number, word = Synonym(i,solution[index]) 57 | text_converted.append (word) 58 | else: 59 | print ('Error') 60 | index += 1 61 | 62 | result = listToString(text_converted) 63 | r = Readability(result) 64 | return r.ari().score 65 | 66 | text = 'Real Madrid Club de Futbol, meaning Royal Madrid Football Club, commonly referred to as Real Madrid, is a Spanish professional football club based in Madrid. Founded in 1902 as Madrid Football Club, the club has traditionally worn a white home kit since its inception. The honorific title real is Spanish for Royal and was bestowed to the club by King Alfonso XIII in 1920 together with the royal crown in the emblem. Real Madrid have played their home matches in the Santiago Bernabeu Stadium in downtown Madrid since 1947. Unlike most European sporting entities, Real Madrid members (socios) have owned and operated the club throughout its history.' 67 | 68 | text_array = [] 69 | index_array = [] 70 | 71 | res = text.split() 72 | for i in res: 73 | flag = 0 74 | if ',' in i: 75 | i = i.replace(',', '') 76 | flag = 1 77 | if '.' in i: 78 | i = i.replace('.', '') 79 | flag = 2 80 | 81 | if (not i[0].isupper() and len(i) > 3): 82 | number, word = Synonym(i,6) 83 | text_array.append (word) 84 | index_array.append (number) 85 | else: 86 | text_array.append (i) 87 | index_array.append (0) 88 | 89 | if flag == 1: 90 | cad = text_array[-1] 91 | text_array.pop() 92 | cad = cad + str(',') 93 | text_array.append (cad) 94 | flag = 0 95 | if flag == 2: 96 | cad = text_array[-1] 97 | text_array.pop() 98 | cad = cad + str('.') 99 | text_array.append (cad) 100 | flag = 0 101 | 102 | def obtain_text (solution): 103 | res2 = text.split() 104 | text_converted = [] 105 | index=0 106 | for i in res2: 107 | if solution[index] < 1: 108 | text_converted.append (i) 109 | elif solution[index] >= 1: 110 | number, word = Synonym(i,solution[index]) 111 | text_converted.append (word.upper()) 112 | else: 113 | print ('Error') 114 | index += 1 115 | 116 | result = listToString(text_converted) 117 | return result 118 | 119 | 120 | class Oruga(FloatProblem): 121 | 122 | def __init__(self): 123 | super(Oruga, self).__init__() 124 | self.number_of_objectives = 2 125 | self.number_of_variables = len(index_array) 126 | self.number_of_constraints = 0 127 | 128 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 129 | self.obj_labels = ['f(x)', 'f(y)'] 130 | 131 | self.lower_bound = self.number_of_variables * [-4] 132 | self.upper_bound = self.number_of_variables * [4] 133 | 134 | FloatSolution.lower_bound = self.lower_bound 135 | FloatSolution.upper_bound = self.upper_bound 136 | 137 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 138 | 139 | solution.objectives[1] = fitness_func1(solution.variables) 140 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 141 | 142 | return solution 143 | 144 | 145 | def get_name(self): 146 | return 'Oruga' 147 | 148 | max_evaluations = 3000 149 | problem = Oruga() 150 | algorithm = GDE3( 151 | problem=problem, 152 | population_size=100, 153 | cr=0.5, 154 | f=0.5, 155 | termination_criterion=StoppingByEvaluations(max_evaluations) 156 | ) 157 | 158 | algorithm.run() 159 | 160 | from jmetal.util.solution import get_non_dominated_solutions, print_function_values_to_file, print_variables_to_file 161 | from jmetal.lab.visualization import Plot 162 | 163 | front = get_non_dominated_solutions(algorithm.get_result()) 164 | 165 | 166 | # save to files 167 | print_function_values_to_file(front, 'FUN.GDE3') 168 | print_variables_to_file(front, 'VAR.GDE3') 169 | plot_front = Plot(title='ORUGA', axis_labels=['Words to be replaced', 'Readability Score']) 170 | plot_front.plot(front, label='GDE3', filename='GDE3-ORUGA', format='png') 171 | 172 | for solution in front: 173 | # We should call here a function to try to correct the text 174 | print (obtain_text(solution.variables)) -------------------------------------------------------------------------------- /comparison/wordnet/jaya.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import gensim.downloader as api 5 | 6 | model = api.load('word2vec-google-news-300') 7 | 8 | 9 | def calculate_hypervolume(pareto_front, ref_point): 10 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 11 | hypervolume = 0.0 12 | prev_point = [0.0, ref_point[1]] 13 | 14 | for point in sorted_front: 15 | if point[1] < prev_point[1]: 16 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 17 | prev_point = point 18 | 19 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 20 | return hypervolume 21 | 22 | def listToString(s): 23 | str1 = "" 24 | for ele in s: 25 | str1 += str(ele) 26 | str1 += " " 27 | 28 | str1 = str1.replace(' ,', ',') 29 | str1 = str1.replace('_', ' ') 30 | return str1 31 | 32 | def Synonym(word, number): 33 | synonyms = [] 34 | for syn in wordnet.synsets(word): 35 | for lm in syn.lemmas(): 36 | synonyms.append(lm.name()) 37 | 38 | if (not synonyms): 39 | return -2, word 40 | elif number >= len(synonyms): 41 | return len(synonyms)-1, synonyms[len(synonyms)-1] 42 | else: 43 | return int(number), synonyms[int(number-1)] 44 | 45 | def fitness_func1(solution): 46 | print (solution) 47 | 48 | #preprocessing 49 | a = 0 50 | for i in index_array: 51 | if index_array[a] <= 0: 52 | solution[a] = 0 53 | a += 1 54 | 55 | res2 = text.split() 56 | text_converted = [] 57 | index=0 58 | for i in res2: 59 | if solution[index] < 1: 60 | text_converted.append (i) 61 | elif solution[index] >= 1: 62 | number, word = Synonym(i,solution[index]) 63 | text_converted.append (word) 64 | else: 65 | print ('Error') 66 | index += 1 67 | 68 | result = listToString(text_converted) 69 | r = Readability(result) 70 | return r.ari().score 71 | 72 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 73 | 74 | text_array = [] 75 | index_array = [] 76 | 77 | res = text.split() 78 | for i in res: 79 | flag = 0 80 | if ',' in i: 81 | i = i.replace(',', '') 82 | flag = 1 83 | if '.' in i: 84 | i = i.replace('.', '') 85 | flag = 2 86 | 87 | if (not i[0].isupper() and len(i) > 3): 88 | number, word = Synonym(i,6) 89 | text_array.append (word) 90 | index_array.append (number) 91 | else: 92 | text_array.append (i) 93 | index_array.append (0) 94 | 95 | if flag == 1: 96 | cad = text_array[-1] 97 | text_array.pop() 98 | cad = cad + str(',') 99 | text_array.append (cad) 100 | flag = 0 101 | if flag == 2: 102 | cad = text_array[-1] 103 | text_array.pop() 104 | cad = cad + str('.') 105 | text_array.append (cad) 106 | flag = 0 107 | 108 | def obtain_text (solution): 109 | res2 = text.split() 110 | text_converted = [] 111 | index=0 112 | for i in res2: 113 | if solution[index] < 1: 114 | text_converted.append (i) 115 | elif solution[index] >= 1: 116 | number, word = Synonym(i,solution[index]) 117 | text_converted.append (word.upper()) 118 | else: 119 | print ('Error') 120 | index += 1 121 | 122 | result = listToString(text_converted) 123 | return result 124 | 125 | 126 | # Multi-objective function to be minimized 127 | def multi_objective(x): 128 | 129 | source = text 130 | target = obtain_text(x) 131 | 132 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 133 | 134 | def jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound): 135 | population = np.random.uniform(lower_bound, upper_bound, (pop_size, num_variables)) 136 | 137 | for _ in range(num_iterations): 138 | new_population = population.copy() 139 | 140 | for i in range(pop_size): 141 | for j in range(num_variables): 142 | rand_idx = np.random.randint(pop_size) 143 | new_value = population[i, j] + np.random.uniform(-1, 1) * (population[rand_idx, j] - population[i, j]) 144 | new_value = np.clip(new_value, lower_bound, upper_bound) 145 | new_population[i, j] = new_value 146 | 147 | population = new_population 148 | 149 | # Calculate the objective values for each individual in the final population 150 | objective_values = np.array([multi_objective(individual) for individual in population]) 151 | return population, objective_values 152 | 153 | pop_size = 20 154 | num_iterations = 50 155 | num_variables = len(index_array) 156 | lower_bound = -4 157 | upper_bound = 4 158 | 159 | final_population, final_objective_values = jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound) 160 | 161 | print("Final Population:") 162 | print(final_population) 163 | 164 | print("\nFinal Objective Values:") 165 | print(final_objective_values) 166 | 167 | ref_point = [60.0, 20.0, 1.0] 168 | hypervolume = calculate_hypervolume(final_objective_values, ref_point) 169 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /oruga2_nsga2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective import NSGAII 11 | from jmetal.operator import SBXCrossover, PolynomialMutation 12 | from jmetal.util.termination_criterion import StoppingByEvaluations 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from jmetal.core.problem import FloatProblem 16 | from jmetal.core.solution import FloatSolution 17 | 18 | 19 | def listToString(s): 20 | str1 = "" 21 | for ele in s: 22 | str1 += str(ele) 23 | str1 += " " 24 | 25 | str1 = str1.replace(' ,', ',') 26 | str1 = str1.replace('_', ' ') 27 | return str1 28 | 29 | def Synonym(word, number): 30 | synonyms = [] 31 | for syn in wordnet.synsets(word): 32 | for lm in syn.lemmas(): 33 | synonyms.append(lm.name()) 34 | 35 | if (not synonyms): 36 | return -2, word 37 | elif number >= len(synonyms): 38 | return len(synonyms)-1, synonyms[len(synonyms)-1] 39 | else: 40 | return int(number), synonyms[int(number-1)] 41 | 42 | def fitness_func1(solution): 43 | #preprocessing 44 | a = 0 45 | for i in index_array: 46 | if index_array[a] <= 0: 47 | solution[a] = 0 48 | a += 1 49 | 50 | res2 = text.split() 51 | text_converted = [] 52 | index=0 53 | for i in res2: 54 | if solution[index] < 1: 55 | text_converted.append (i) 56 | elif solution[index] >= 1: 57 | number, word = Synonym(i,solution[index]) 58 | text_converted.append (word) 59 | else: 60 | print ('Error') 61 | index += 1 62 | 63 | result = listToString(text_converted) 64 | r = Readability(result) 65 | return r.flesch_kincaid().score 66 | 67 | text = 'Real Madrid Club de Futbol, meaning Royal Madrid Football Club, commonly referred to as Real Madrid, is a Spanish professional football club based in Madrid. Founded in 1902 as Madrid Football Club, the club has traditionally worn a white home kit since its inception. The honorific title real is Spanish for Royal and was bestowed to the club by King Alfonso XIII in 1920 together with the royal crown in the emblem. Real Madrid have played their home matches in the Santiago Bernabeu Stadium in downtown Madrid since 1947. Unlike most European sporting entities, Real Madrid members (socios) have owned and operated the club throughout its history.' 68 | 69 | text_array = [] 70 | index_array = [] 71 | 72 | res = text.split() 73 | for i in res: 74 | flag = 0 75 | if ',' in i: 76 | i = i.replace(',', '') 77 | flag = 1 78 | if '.' in i: 79 | i = i.replace('.', '') 80 | flag = 2 81 | 82 | if (not i[0].isupper() and len(i) > 3): 83 | number, word = Synonym(i,6) 84 | text_array.append (word) 85 | index_array.append (number) 86 | else: 87 | text_array.append (i) 88 | index_array.append (0) 89 | 90 | if flag == 1: 91 | cad = text_array[-1] 92 | text_array.pop() 93 | cad = cad + str(',') 94 | text_array.append (cad) 95 | flag = 0 96 | if flag == 2: 97 | cad = text_array[-1] 98 | text_array.pop() 99 | cad = cad + str('.') 100 | text_array.append (cad) 101 | flag = 0 102 | 103 | def obtain_text (solution): 104 | res2 = text.split() 105 | text_converted = [] 106 | index=0 107 | for i in res2: 108 | if solution[index] < 1: 109 | text_converted.append (i) 110 | elif solution[index] >= 1: 111 | number, word = Synonym(i,solution[index]) 112 | text_converted.append (word.upper()) 113 | else: 114 | print ('Error') 115 | index += 1 116 | 117 | result = listToString(text_converted) 118 | return result 119 | 120 | 121 | class Oruga(FloatProblem): 122 | 123 | def __init__(self): 124 | super(Oruga, self).__init__() 125 | self.number_of_objectives = 2 126 | self.number_of_variables = len(index_array) 127 | self.number_of_constraints = 0 128 | 129 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 130 | self.obj_labels = ['f(x)', 'f(y)'] 131 | 132 | self.lower_bound = self.number_of_variables * [-4] 133 | self.upper_bound = self.number_of_variables * [4] 134 | 135 | FloatSolution.lower_bound = self.lower_bound 136 | FloatSolution.upper_bound = self.upper_bound 137 | 138 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 139 | 140 | solution.objectives[1] = fitness_func1(solution.variables) 141 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 142 | 143 | return solution 144 | 145 | 146 | def get_name(self): 147 | return 'Oruga' 148 | 149 | problem = Oruga() 150 | algorithm = NSGAII( 151 | problem=problem, 152 | population_size=20, 153 | offspring_population_size=30, 154 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 155 | crossover=SBXCrossover(probability=1.0, distribution_index=20), 156 | termination_criterion=StoppingByEvaluations(max_evaluations=800) 157 | ) 158 | 159 | algorithm.run() 160 | 161 | from jmetal.util.solution import get_non_dominated_solutions, print_function_values_to_file, print_variables_to_file 162 | from jmetal.lab.visualization import Plot 163 | 164 | front = get_non_dominated_solutions(algorithm.get_result()) 165 | 166 | 167 | # save to files 168 | print_function_values_to_file(front, 'FUN.NSGAII') 169 | print_variables_to_file(front, 'VAR.NSGAII') 170 | plot_front = Plot(title='ORUGA', axis_labels=['Words to be replaced', 'Readability Score']) 171 | plot_front.plot(front, label='NSGA-II', filename='NSGAII-ORUGA', format='png') 172 | 173 | for solution in front: 174 | # We should call here a function to try to correct the text 175 | print (obtain_text(solution.variables)) -------------------------------------------------------------------------------- /oruga3_gde3_wmd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.gde3 import GDE3 11 | from jmetal.util.termination_criterion import StoppingByEvaluations 12 | from readability import Readability 13 | from nltk.corpus import wordnet 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | import gensim.downloader as api 17 | model = api.load('word2vec-google-news-300') 18 | 19 | def listToString(s): 20 | str1 = "" 21 | for ele in s: 22 | str1 += str(ele) 23 | str1 += " " 24 | 25 | str1 = str1.replace(' ,', ',') 26 | str1 = str1.replace('_', ' ') 27 | return str1 28 | 29 | def Synonym(word, number): 30 | synonyms = [] 31 | for syn in wordnet.synsets(word): 32 | for lm in syn.lemmas(): 33 | synonyms.append(lm.name()) 34 | 35 | if (not synonyms): 36 | return -2, word 37 | elif number >= len(synonyms): 38 | return len(synonyms)-1, synonyms[len(synonyms)-1] 39 | else: 40 | return int(number), synonyms[int(number-1)] 41 | 42 | def fitness_func1(solution): 43 | #preprocessing 44 | a = 0 45 | for i in index_array: 46 | if index_array[a] <= 0: 47 | solution[a] = 0 48 | a += 1 49 | 50 | res2 = text.split() 51 | text_converted = [] 52 | index=0 53 | for i in res2: 54 | if solution[index] < 1: 55 | text_converted.append (i) 56 | elif solution[index] >= 1: 57 | number, word = Synonym(i,solution[index]) 58 | text_converted.append (word) 59 | else: 60 | print ('Error') 61 | index += 1 62 | 63 | result = listToString(text_converted) 64 | r = Readability(result) 65 | return r.ari().score 66 | 67 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 68 | 69 | text_array = [] 70 | index_array = [] 71 | 72 | res = text.split() 73 | for i in res: 74 | flag = 0 75 | if ',' in i: 76 | i = i.replace(',', '') 77 | flag = 1 78 | if '.' in i: 79 | i = i.replace('.', '') 80 | flag = 2 81 | 82 | if (not i[0].isupper() and len(i) > 3): 83 | number, word = Synonym(i,6) 84 | text_array.append (word) 85 | index_array.append (number) 86 | else: 87 | text_array.append (i) 88 | index_array.append (0) 89 | 90 | if flag == 1: 91 | cad = text_array[-1] 92 | text_array.pop() 93 | cad = cad + str(',') 94 | text_array.append (cad) 95 | flag = 0 96 | if flag == 2: 97 | cad = text_array[-1] 98 | text_array.pop() 99 | cad = cad + str('.') 100 | text_array.append (cad) 101 | flag = 0 102 | 103 | def obtain_text (solution): 104 | res2 = text.split() 105 | text_converted = [] 106 | index=0 107 | for i in res2: 108 | if solution[index] < 1: 109 | text_converted.append (i) 110 | elif solution[index] >= 1: 111 | number, word = Synonym(i,solution[index]) 112 | text_converted.append (word.upper()) 113 | else: 114 | print ('Error') 115 | index += 1 116 | 117 | result = listToString(text_converted) 118 | return result 119 | 120 | 121 | class Oruga(FloatProblem): 122 | 123 | def __init__(self): 124 | super(Oruga, self).__init__() 125 | self.number_of_objectives = 3 126 | self.number_of_variables = len(index_array) 127 | self.number_of_constraints = 0 128 | 129 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 130 | self.obj_labels = ['f(x)', 'f(y)'] 131 | 132 | self.lower_bound = self.number_of_variables * [-4] 133 | self.upper_bound = self.number_of_variables * [4] 134 | 135 | FloatSolution.lower_bound = self.lower_bound 136 | FloatSolution.upper_bound = self.upper_bound 137 | 138 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 139 | 140 | source = text 141 | target = obtain_text(solution.variables) 142 | 143 | solution.objectives[2] = float (model.wmdistance(source, target)) 144 | solution.objectives[1] = fitness_func1(solution.variables) 145 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 146 | 147 | return solution 148 | 149 | 150 | def get_name(self): 151 | return 'Oruga' 152 | 153 | max_evaluations = 3000 154 | problem = Oruga() 155 | algorithm = GDE3( 156 | problem=problem, 157 | population_size=100, 158 | cr=0.5, 159 | f=0.5, 160 | termination_criterion=StoppingByEvaluations(max_evaluations) 161 | ) 162 | 163 | algorithm.run() 164 | 165 | from jmetal.util.solution import get_non_dominated_solutions, print_function_values_to_file, print_variables_to_file 166 | from jmetal.lab.visualization import Plot 167 | 168 | front = get_non_dominated_solutions(algorithm.get_result()) 169 | 170 | 171 | # save to files 172 | print_function_values_to_file(front, 'FUN.GDE3') 173 | print_variables_to_file(front, 'VAR.GDE3') 174 | plot_front = Plot(title='ORUGA', axis_labels=['Words to be replaced', 'Readability Score']) 175 | plot_front.plot(front, label='GDE3', filename='GDE3-ORUGA', format='png') 176 | 177 | for solution in front: 178 | # We should call here a function to try to correct the text 179 | print (obtain_text(solution.variables)) -------------------------------------------------------------------------------- /oruga3_nsga2_wmd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective import NSGAII 11 | from jmetal.operator import SBXCrossover, PolynomialMutation 12 | from jmetal.util.termination_criterion import StoppingByEvaluations 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from jmetal.core.problem import FloatProblem 16 | from jmetal.core.solution import FloatSolution 17 | import gensim.downloader as api 18 | model = api.load('word2vec-google-news-300') 19 | 20 | 21 | def listToString(s): 22 | str1 = "" 23 | for ele in s: 24 | str1 += str(ele) 25 | str1 += " " 26 | 27 | str1 = str1.replace(' ,', ',') 28 | str1 = str1.replace('_', ' ') 29 | return str1 30 | 31 | def Synonym(word, number): 32 | synonyms = [] 33 | for syn in wordnet.synsets(word): 34 | for lm in syn.lemmas(): 35 | synonyms.append(lm.name()) 36 | 37 | if (not synonyms): 38 | return -2, word 39 | elif number >= len(synonyms): 40 | return len(synonyms)-1, synonyms[len(synonyms)-1] 41 | else: 42 | return int(number), synonyms[int(number-1)] 43 | 44 | def fitness_func1(solution): 45 | #preprocessing 46 | a = 0 47 | for i in index_array: 48 | if index_array[a] <= 0: 49 | solution[a] = 0 50 | a += 1 51 | 52 | res2 = text.split() 53 | text_converted = [] 54 | index=0 55 | for i in res2: 56 | if solution[index] < 1: 57 | text_converted.append (i) 58 | elif solution[index] >= 1: 59 | number, word = Synonym(i,solution[index]) 60 | text_converted.append (word) 61 | else: 62 | print ('Error') 63 | index += 1 64 | 65 | result = listToString(text_converted) 66 | r = Readability(result) 67 | return r.flesch_kincaid().score 68 | 69 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 70 | 71 | text_array = [] 72 | index_array = [] 73 | 74 | res = text.split() 75 | for i in res: 76 | flag = 0 77 | if ',' in i: 78 | i = i.replace(',', '') 79 | flag = 1 80 | if '.' in i: 81 | i = i.replace('.', '') 82 | flag = 2 83 | 84 | if (not i[0].isupper() and len(i) > 3): 85 | number, word = Synonym(i,6) 86 | text_array.append (word) 87 | index_array.append (number) 88 | else: 89 | text_array.append (i) 90 | index_array.append (0) 91 | 92 | if flag == 1: 93 | cad = text_array[-1] 94 | text_array.pop() 95 | cad = cad + str(',') 96 | text_array.append (cad) 97 | flag = 0 98 | if flag == 2: 99 | cad = text_array[-1] 100 | text_array.pop() 101 | cad = cad + str('.') 102 | text_array.append (cad) 103 | flag = 0 104 | 105 | def obtain_text (solution): 106 | res2 = text.split() 107 | text_converted = [] 108 | index=0 109 | for i in res2: 110 | if solution[index] < 1: 111 | text_converted.append (i) 112 | elif solution[index] >= 1: 113 | number, word = Synonym(i,solution[index]) 114 | text_converted.append (word.upper()) 115 | else: 116 | print ('Error') 117 | index += 1 118 | 119 | result = listToString(text_converted) 120 | return result 121 | 122 | 123 | class Oruga(FloatProblem): 124 | 125 | def __init__(self): 126 | super(Oruga, self).__init__() 127 | self.number_of_objectives = 3 128 | self.number_of_variables = len(index_array) 129 | self.number_of_constraints = 0 130 | 131 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 132 | self.obj_labels = ['f(x)', 'f(y)'] 133 | 134 | self.lower_bound = self.number_of_variables * [-4] 135 | self.upper_bound = self.number_of_variables * [4] 136 | 137 | FloatSolution.lower_bound = self.lower_bound 138 | FloatSolution.upper_bound = self.upper_bound 139 | 140 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 141 | 142 | source = text 143 | target = obtain_text(solution.variables) 144 | 145 | solution.objectives[2] = float (model.wmdistance(source, target)) 146 | solution.objectives[1] = fitness_func1(solution.variables) 147 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 148 | 149 | return solution 150 | 151 | 152 | def get_name(self): 153 | return 'Oruga' 154 | 155 | problem = Oruga() 156 | algorithm = NSGAII( 157 | problem=problem, 158 | population_size=20, 159 | offspring_population_size=30, 160 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 161 | crossover=SBXCrossover(probability=1.0, distribution_index=20), 162 | termination_criterion=StoppingByEvaluations(max_evaluations=800) 163 | ) 164 | 165 | algorithm.run() 166 | 167 | from jmetal.util.solution import get_non_dominated_solutions, print_function_values_to_file, print_variables_to_file 168 | from jmetal.lab.visualization import Plot 169 | 170 | front = get_non_dominated_solutions(algorithm.get_result()) 171 | 172 | 173 | # save to files 174 | print_function_values_to_file(front, 'FUN.NSGAII') 175 | print_variables_to_file(front, 'VAR.NSGAII') 176 | plot_front = Plot(title='ORUGA', axis_labels=['Words to be replaced', 'Readability Score', 'Semantic Distance']) 177 | plot_front.plot(front, label='NSGA-II', filename='NSGAII-ORUGA', format='png') 178 | 179 | for solution in front: 180 | print (obtain_text(solution.variables)) -------------------------------------------------------------------------------- /comparison/wordnet/gde3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.gde3 import GDE3 11 | from jmetal.util.termination_criterion import StoppingByEvaluations 12 | from readability import Readability 13 | from nltk.corpus import wordnet 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | import gensim.downloader as api 17 | model = api.load('word2vec-google-news-300') 18 | 19 | def calculate_hypervolume(pareto_front, ref_point): 20 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 21 | hypervolume = 0.0 22 | prev_point = [0.0, ref_point[1]] 23 | 24 | for point in sorted_front: 25 | if point[1] < prev_point[1]: 26 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 27 | prev_point = point 28 | 29 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 30 | return hypervolume 31 | 32 | def listToString(s): 33 | str1 = "" 34 | for ele in s: 35 | str1 += str(ele) 36 | str1 += " " 37 | 38 | str1 = str1.replace(' ,', ',') 39 | str1 = str1.replace('_', ' ') 40 | return str1 41 | 42 | def Synonym(word, number): 43 | synonyms = [] 44 | for syn in wordnet.synsets(word): 45 | for lm in syn.lemmas(): 46 | synonyms.append(lm.name()) 47 | 48 | if (not synonyms): 49 | return -2, word 50 | elif number >= len(synonyms): 51 | return len(synonyms)-1, synonyms[len(synonyms)-1] 52 | else: 53 | return int(number), synonyms[int(number-1)] 54 | 55 | def fitness_func1(solution): 56 | #preprocessing 57 | a = 0 58 | for i in index_array: 59 | if index_array[a] <= 0: 60 | solution[a] = 0 61 | a += 1 62 | 63 | res2 = text.split() 64 | text_converted = [] 65 | index=0 66 | for i in res2: 67 | if solution[index] < 1: 68 | text_converted.append (i) 69 | elif solution[index] >= 1: 70 | number, word = Synonym(i,solution[index]) 71 | text_converted.append (word) 72 | else: 73 | print ('Error') 74 | index += 1 75 | 76 | result = listToString(text_converted) 77 | r = Readability(result) 78 | return r.ari().score 79 | 80 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 81 | 82 | text_array = [] 83 | index_array = [] 84 | 85 | res = text.split() 86 | for i in res: 87 | flag = 0 88 | if ',' in i: 89 | i = i.replace(',', '') 90 | flag = 1 91 | if '.' in i: 92 | i = i.replace('.', '') 93 | flag = 2 94 | 95 | if (not i[0].isupper() and len(i) > 3): 96 | number, word = Synonym(i,6) 97 | text_array.append (word) 98 | index_array.append (number) 99 | else: 100 | text_array.append (i) 101 | index_array.append (0) 102 | 103 | if flag == 1: 104 | cad = text_array[-1] 105 | text_array.pop() 106 | cad = cad + str(',') 107 | text_array.append (cad) 108 | flag = 0 109 | if flag == 2: 110 | cad = text_array[-1] 111 | text_array.pop() 112 | cad = cad + str('.') 113 | text_array.append (cad) 114 | flag = 0 115 | 116 | def obtain_text (solution): 117 | res2 = text.split() 118 | text_converted = [] 119 | index=0 120 | for i in res2: 121 | if solution[index] < 1: 122 | text_converted.append (i) 123 | elif solution[index] >= 1: 124 | number, word = Synonym(i,solution[index]) 125 | text_converted.append (word.upper()) 126 | else: 127 | print ('Error') 128 | index += 1 129 | 130 | result = listToString(text_converted) 131 | return result 132 | 133 | 134 | class Oruga(FloatProblem): 135 | 136 | def __init__(self): 137 | super(Oruga, self).__init__() 138 | self.number_of_objectives = 3 139 | self.number_of_variables = len(index_array) 140 | self.number_of_constraints = 0 141 | 142 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 143 | self.obj_labels = ['f(x)', 'f(y)'] 144 | 145 | self.lower_bound = self.number_of_variables * [-4] 146 | self.upper_bound = self.number_of_variables * [4] 147 | 148 | FloatSolution.lower_bound = self.lower_bound 149 | FloatSolution.upper_bound = self.upper_bound 150 | 151 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 152 | 153 | source = text 154 | target = obtain_text(solution.variables) 155 | 156 | solution.objectives[2] = float (model.wmdistance(source, target)) 157 | solution.objectives[1] = fitness_func1(solution.variables) 158 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 159 | 160 | return solution 161 | 162 | 163 | def get_name(self): 164 | return 'Oruga' 165 | 166 | max_evaluations = 3000 167 | problem = Oruga() 168 | algorithm = GDE3( 169 | problem=problem, 170 | population_size=100, 171 | cr=0.5, 172 | f=0.5, 173 | termination_criterion=StoppingByEvaluations(max_evaluations) 174 | ) 175 | 176 | algorithm.run() 177 | 178 | from jmetal.util.solution import get_non_dominated_solutions 179 | 180 | front = get_non_dominated_solutions(algorithm.get_result()) 181 | 182 | # Print number of solutions 183 | print (len(front)) 184 | 185 | # Print time 186 | print (algorithm.total_computing_time) 187 | 188 | # Define the reference point (maximum values for each objective) 189 | ref_point = [30.0, 20.0, 1.0] 190 | 191 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 192 | 193 | print("Pareto Front:") 194 | for point in pareto_front: 195 | print(point) 196 | 197 | # Calculate the hypervolume 198 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 199 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/wordnet/nsga2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective import NSGAII 11 | from jmetal.operator import SBXCrossover, PolynomialMutation 12 | from jmetal.util.termination_criterion import StoppingByEvaluations 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from jmetal.core.problem import FloatProblem 16 | from jmetal.core.solution import FloatSolution 17 | import gensim.downloader as api 18 | model = api.load('word2vec-google-news-300') 19 | 20 | def calculate_hypervolume(pareto_front, ref_point): 21 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 22 | hypervolume = 0.0 23 | prev_point = [0.0, ref_point[1]] 24 | 25 | for point in sorted_front: 26 | if point[1] < prev_point[1]: 27 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 28 | prev_point = point 29 | 30 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 31 | return hypervolume 32 | 33 | def listToString(s): 34 | str1 = "" 35 | for ele in s: 36 | str1 += str(ele) 37 | str1 += " " 38 | 39 | str1 = str1.replace(' ,', ',') 40 | str1 = str1.replace('_', ' ') 41 | return str1 42 | 43 | def Synonym(word, number): 44 | synonyms = [] 45 | for syn in wordnet.synsets(word): 46 | for lm in syn.lemmas(): 47 | synonyms.append(lm.name()) 48 | 49 | if (not synonyms): 50 | return -2, word 51 | elif number >= len(synonyms): 52 | return len(synonyms)-1, synonyms[len(synonyms)-1] 53 | else: 54 | return int(number), synonyms[int(number-1)] 55 | 56 | def fitness_func1(solution): 57 | #preprocessing 58 | a = 0 59 | for i in index_array: 60 | if index_array[a] <= 0: 61 | solution[a] = 0 62 | a += 1 63 | 64 | res2 = text.split() 65 | text_converted = [] 66 | index=0 67 | for i in res2: 68 | if solution[index] < 1: 69 | text_converted.append (i) 70 | elif solution[index] >= 1: 71 | number, word = Synonym(i,solution[index]) 72 | text_converted.append (word) 73 | else: 74 | print ('Error') 75 | index += 1 76 | 77 | result = listToString(text_converted) 78 | r = Readability(result) 79 | return r.ari().score 80 | 81 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 82 | 83 | text_array = [] 84 | index_array = [] 85 | 86 | res = text.split() 87 | for i in res: 88 | flag = 0 89 | if ',' in i: 90 | i = i.replace(',', '') 91 | flag = 1 92 | if '.' in i: 93 | i = i.replace('.', '') 94 | flag = 2 95 | 96 | if (not i[0].isupper() and len(i) > 3): 97 | number, word = Synonym(i,6) 98 | text_array.append (word) 99 | index_array.append (number) 100 | else: 101 | text_array.append (i) 102 | index_array.append (0) 103 | 104 | if flag == 1: 105 | cad = text_array[-1] 106 | text_array.pop() 107 | cad = cad + str(',') 108 | text_array.append (cad) 109 | flag = 0 110 | if flag == 2: 111 | cad = text_array[-1] 112 | text_array.pop() 113 | cad = cad + str('.') 114 | text_array.append (cad) 115 | flag = 0 116 | 117 | def obtain_text (solution): 118 | res2 = text.split() 119 | text_converted = [] 120 | index=0 121 | for i in res2: 122 | if solution[index] < 1: 123 | text_converted.append (i) 124 | elif solution[index] >= 1: 125 | number, word = Synonym(i,solution[index]) 126 | text_converted.append (word.upper()) 127 | else: 128 | print ('Error') 129 | index += 1 130 | 131 | result = listToString(text_converted) 132 | return result 133 | 134 | 135 | class Oruga(FloatProblem): 136 | 137 | def __init__(self): 138 | super(Oruga, self).__init__() 139 | self.number_of_objectives = 3 140 | self.number_of_variables = len(index_array) 141 | self.number_of_constraints = 0 142 | 143 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 144 | self.obj_labels = ['f(x)', 'f(y)'] 145 | 146 | self.lower_bound = self.number_of_variables * [-4] 147 | self.upper_bound = self.number_of_variables * [4] 148 | 149 | FloatSolution.lower_bound = self.lower_bound 150 | FloatSolution.upper_bound = self.upper_bound 151 | 152 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 153 | 154 | source = text 155 | target = obtain_text(solution.variables) 156 | 157 | solution.objectives[2] = float (model.wmdistance(source, target)) 158 | solution.objectives[1] = fitness_func1(solution.variables) 159 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 160 | 161 | return solution 162 | 163 | 164 | def get_name(self): 165 | return 'Oruga' 166 | 167 | max_evaluations = 3000 168 | problem = Oruga() 169 | algorithm = NSGAII( 170 | problem=problem, 171 | population_size=20, 172 | offspring_population_size=30, 173 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 174 | crossover=SBXCrossover(probability=1.0, distribution_index=20), 175 | termination_criterion=StoppingByEvaluations(max_evaluations=800) 176 | ) 177 | 178 | algorithm.run() 179 | 180 | from jmetal.util.solution import get_non_dominated_solutions 181 | 182 | front = get_non_dominated_solutions(algorithm.get_result()) 183 | 184 | # Print number of solutions 185 | print (len(front)) 186 | 187 | # Print time 188 | print (algorithm.total_computing_time) 189 | 190 | # Define the reference point (maximum values for each objective) 191 | ref_point = [30.0, 20.0, 1.0] 192 | 193 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 194 | 195 | print("Pareto Front:") 196 | for point in pareto_front: 197 | print(point) 198 | 199 | # Calculate the hypervolume 200 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 201 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/word2vec/jaya.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import gensim.downloader as api 5 | import gensim.downloader 6 | import time 7 | start_time = time.time() # Record the starting time 8 | 9 | model = api.load('word2vec-google-news-300') 10 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 11 | 12 | def calculate_hypervolume(pareto_front, ref_point): 13 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 14 | hypervolume = 0.0 15 | prev_point = [0.0, ref_point[1]] 16 | 17 | for point in sorted_front: 18 | if point[1] < prev_point[1]: 19 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 20 | prev_point = point 21 | 22 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 23 | return hypervolume 24 | 25 | 26 | 27 | def listToString(s): 28 | str1 = "" 29 | for ele in s: 30 | str1 += str(ele) 31 | str1 += " " 32 | 33 | str1 = str1.replace(' ,', ',') 34 | str1 = str1.replace('_', ' ') 35 | return str1 36 | 37 | def Synonym(word, number): 38 | synonyms = [] 39 | 40 | if (Dict.get(word) is not None): 41 | synonyms = Dict.get(word) 42 | 43 | if (not synonyms): 44 | return -2, word 45 | elif number >= len(synonyms): 46 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 47 | else: 48 | return int(number), synonyms[int(number-1)][0] 49 | 50 | def fitness_func1(solution): 51 | print (solution) 52 | 53 | #preprocessing 54 | a = 0 55 | for i in index_array: 56 | if index_array[a] <= 0: 57 | solution[a] = 0 58 | a += 1 59 | 60 | res2 = text.split() 61 | text_converted = [] 62 | index=0 63 | for i in res2: 64 | if solution[index] < 1: 65 | text_converted.append (i) 66 | elif solution[index] >= 1: 67 | number, word = Synonym(i,solution[index]) 68 | text_converted.append (word) 69 | else: 70 | print ('Error') 71 | index += 1 72 | 73 | result = listToString(text_converted) 74 | r = Readability(result) 75 | return r.ari().score 76 | 77 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 78 | 79 | #Creates a dictionary in order to store all the synonyms in main memory 80 | resource = text.split() 81 | Dict = {} 82 | for i in resource: 83 | if ',' in i: 84 | i = i.replace(',', '') 85 | if '.' in i: 86 | i = i.replace('.', '') 87 | 88 | if (not i[0].isupper() and len(i) > 3): 89 | if i in Dict.keys(): 90 | print ("Processing...Please wait") 91 | else: 92 | try: 93 | synonyms = google_news_vectors.most_similar(i, topn=6) 94 | except KeyError as e: 95 | print (e) 96 | synonyms = None 97 | if synonyms is not None: 98 | Dict[i] = [] 99 | Dict[i] = synonyms 100 | 101 | text_array = [] 102 | index_array = [] 103 | 104 | res = text.split() 105 | for i in res: 106 | flag = 0 107 | if ',' in i: 108 | i = i.replace(',', '') 109 | flag = 1 110 | if '.' in i: 111 | i = i.replace('.', '') 112 | flag = 2 113 | 114 | if (not i[0].isupper() and len(i) > 3): 115 | number, word = Synonym(i,6) 116 | text_array.append (word) 117 | index_array.append (number) 118 | else: 119 | text_array.append (i) 120 | index_array.append (0) 121 | 122 | if flag == 1: 123 | cad = text_array[-1] 124 | text_array.pop() 125 | cad = cad + str(',') 126 | text_array.append (cad) 127 | flag = 0 128 | if flag == 2: 129 | cad = text_array[-1] 130 | text_array.pop() 131 | cad = cad + str('.') 132 | text_array.append (cad) 133 | flag = 0 134 | 135 | def obtain_text (solution): 136 | res2 = text.split() 137 | text_converted = [] 138 | index=0 139 | for i in res2: 140 | if solution[index] < 1: 141 | text_converted.append (i) 142 | elif solution[index] >= 1: 143 | number, word = Synonym(i,solution[index]) 144 | text_converted.append (word.upper()) 145 | else: 146 | print ('Error') 147 | index += 1 148 | 149 | result = listToString(text_converted) 150 | return result 151 | 152 | 153 | # Multi-objective function to be minimized 154 | def multi_objective(x): 155 | 156 | source = text 157 | target = obtain_text(x) 158 | 159 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 160 | 161 | def jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound): 162 | population = np.random.uniform(lower_bound, upper_bound, (pop_size, num_variables)) 163 | 164 | for _ in range(num_iterations): 165 | new_population = population.copy() 166 | 167 | for i in range(pop_size): 168 | for j in range(num_variables): 169 | rand_idx = np.random.randint(pop_size) 170 | new_value = population[i, j] + np.random.uniform(-1, 1) * (population[rand_idx, j] - population[i, j]) 171 | new_value = np.clip(new_value, lower_bound, upper_bound) 172 | new_population[i, j] = new_value 173 | 174 | population = new_population 175 | 176 | # Calculate the objective values for each individual in the final population 177 | objective_values = np.array([multi_objective(individual) for individual in population]) 178 | return population, objective_values 179 | 180 | pop_size = 20 181 | num_iterations = 50 182 | num_variables = len(index_array) 183 | lower_bound = -4 184 | upper_bound = 4 185 | 186 | final_population, final_objective_values = jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound) 187 | 188 | print("Final Population:") 189 | print(final_population) 190 | 191 | print("\nFinal Objective Values:") 192 | print(final_objective_values) 193 | 194 | end_time = time.time() # Record the ending time 195 | elapsed_time = end_time - start_time # Calculate the elapsed time 196 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 197 | 198 | ref_point = [60.0, 20.0, 1.0] 199 | hypervolume = calculate_hypervolume(final_objective_values, ref_point) 200 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/wordnet/pso.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Particle Swarm Optimization (PSO) 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.smpso import SMPSO 11 | from jmetal.operator import PolynomialMutation 12 | from jmetal.util.archive import CrowdingDistanceArchive 13 | from jmetal.util.termination_criterion import StoppingByEvaluations 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | from jmetal.util.solution import get_non_dominated_solutions 17 | from nltk.corpus import wordnet 18 | from readability import Readability 19 | import gensim.downloader as api 20 | 21 | model = api.load('word2vec-google-news-300') 22 | 23 | def calculate_hypervolume(pareto_front, ref_point): 24 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 25 | hypervolume = 0.0 26 | prev_point = [0.0, ref_point[1]] 27 | 28 | for point in sorted_front: 29 | if point[1] < prev_point[1]: 30 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 31 | prev_point = point 32 | 33 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 34 | return hypervolume 35 | 36 | def listToString(s): 37 | str1 = "" 38 | for ele in s: 39 | str1 += str(ele) 40 | str1 += " " 41 | 42 | str1 = str1.replace(' ,', ',') 43 | str1 = str1.replace('_', ' ') 44 | return str1 45 | 46 | def Synonym(word, number): 47 | synonyms = [] 48 | for syn in wordnet.synsets(word): 49 | for lm in syn.lemmas(): 50 | synonyms.append(lm.name()) 51 | 52 | if (not synonyms): 53 | return -2, word 54 | elif number >= len(synonyms): 55 | return len(synonyms)-1, synonyms[len(synonyms)-1] 56 | else: 57 | return int(number), synonyms[int(number-1)] 58 | 59 | def fitness_func1(solution): 60 | # preprocessing 61 | a = 0 62 | for i in index_array: 63 | if index_array[a] <= 0: 64 | solution[a] = 0 65 | a += 1 66 | 67 | res2 = text.split() 68 | text_converted = [] 69 | index=0 70 | for i in res2: 71 | if solution[index] < 1: 72 | text_converted.append (i) 73 | elif solution[index] >= 1: 74 | number, word = Synonym(i,solution[index]) 75 | text_converted.append (word) 76 | else: 77 | print ('Error') 78 | index += 1 79 | 80 | result = listToString(text_converted) 81 | r = Readability(result) 82 | return r.flesch_kincaid().score 83 | 84 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 85 | 86 | text_array = [] 87 | index_array = [] 88 | 89 | res = text.split() 90 | for i in res: 91 | flag = 0 92 | if ',' in i: 93 | i = i.replace(',', '') 94 | flag = 1 95 | if '.' in i: 96 | i = i.replace('.', '') 97 | flag = 2 98 | 99 | if (not i[0].isupper() and len(i) > 3): 100 | number, word = Synonym(i,6) 101 | text_array.append (word) 102 | index_array.append (number) 103 | else: 104 | text_array.append (i) 105 | index_array.append (0) 106 | 107 | if flag == 1: 108 | cad = text_array[-1] 109 | text_array.pop() 110 | cad = cad + str(',') 111 | text_array.append (cad) 112 | flag = 0 113 | if flag == 2: 114 | cad = text_array[-1] 115 | text_array.pop() 116 | cad = cad + str('.') 117 | text_array.append (cad) 118 | flag = 0 119 | 120 | def obtain_text (solution): 121 | res2 = text.split() 122 | text_converted = [] 123 | index=0 124 | for i in res2: 125 | if solution[index] < 1: 126 | text_converted.append (i) 127 | elif solution[index] >= 1: 128 | number, word = Synonym(i,solution[index]) 129 | text_converted.append (word.upper()) 130 | else: 131 | print ('Error') 132 | index += 1 133 | 134 | result = listToString(text_converted) 135 | return result 136 | 137 | 138 | class Oruga(FloatProblem): 139 | 140 | def __init__(self): 141 | super(Oruga, self).__init__() 142 | self.number_of_objectives = 3 143 | self.number_of_variables = len(index_array) 144 | self.number_of_constraints = 0 145 | 146 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE, self.MINIMIZE] 147 | self.obj_labels = ['f(x)', 'f(y)', 'f(z)'] 148 | 149 | self.lower_bound = self.number_of_variables * [-4] 150 | self.upper_bound = self.number_of_variables * [4] 151 | 152 | FloatSolution.lower_bound = self.lower_bound 153 | FloatSolution.upper_bound = self.upper_bound 154 | 155 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 156 | 157 | source = text 158 | target = obtain_text(solution.variables) 159 | 160 | solution.objectives[2] = float(model.wmdistance(source, target)) 161 | solution.objectives[1] = fitness_func1(solution.variables) 162 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 163 | 164 | return solution 165 | 166 | def get_name(self): 167 | return 'Oruga' 168 | 169 | problem = Oruga() 170 | 171 | max_evaluations = 100 172 | algorithm = SMPSO( 173 | problem=problem, 174 | swarm_size=100, 175 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 176 | leaders=CrowdingDistanceArchive(100), 177 | termination_criterion=StoppingByEvaluations(max_evaluations) 178 | ) 179 | 180 | algorithm.run() 181 | solutions = algorithm.get_result() 182 | front = get_non_dominated_solutions(solutions) 183 | 184 | # Print number of solutions 185 | print (len(front)) 186 | 187 | # Print time 188 | print (algorithm.total_computing_time) 189 | 190 | # Define the reference point (maximum values for each objective) 191 | ref_point = [30.0, 20.0, 1.0] 192 | 193 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 194 | 195 | print("Pareto Front:") 196 | for point in pareto_front: 197 | print(point) 198 | 199 | # Calculate the hypervolume 200 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 201 | print("Hypervolume:", hypervolume) 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /texts.txt: -------------------------------------------------------------------------------- 1 | "The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans." 2 | "Austria emerged from the remnants of the Eastern and Hungarian March at the end of the first millennium. Originally a margraviate of Bavaria, it developed into a duchy of the Holy Roman Empire in 1156 and was later made an archduchy in 1453. In the 16th century, Vienna began serving as the empire administrative capital and Austria thus became the heartland of the Habsburg monarchy. After the dissolution of the Holy Roman Empire in 1806, Austria established its own empire, which became a great power and the dominant member of the German Confederation. The defeat in the Austro-Prussian War of 1866 led to the end of the Confederation and paved the way for the establishment of Austria-Hungary a year later." 3 | "The Mediterranean Sea is a sea connected to the Atlantic Ocean, surrounded by the Mediterranean Basin and almost completely enclosed by land: on the north by Western and Southern Europe and Anatolia, on the south by North Africa, and on the east by the Levant. The Sea has played a central role in the history of Western civilization. Although the Mediterranean is sometimes considered a part of the Atlantic Ocean, it is usually referred to as a separate body of water. Geological evidence indicates that around 5.9 million years ago, the Mediterranean was cut off from the Atlantic and was partly or completely desiccated over a period of some 600,000 years during the Messinian salinity crisis before being refilled by the Zanclean flood about 5.3 million years ago." 4 | "Real Madrid Club de Futbol, meaning Royal Madrid Football Club, commonly referred to as Real Madrid, is a Spanish professional football club based in Madrid. Founded in 1902 as Madrid Football Club, the club has traditionally worn a white home kit since its inception. The honorific title real is Spanish for Royal and was bestowed to the club by King Alfonso XIII in 1920 together with the royal crown in the emblem. Real Madrid have played their home matches in the Santiago Bernabeu Stadium in downtown Madrid since 1947. Unlike most European sporting entities, Real Madrid members (socios) have owned and operated the club throughout its history." 5 | "A programming language is a system of notation for writing computer programs. Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language. The description of a programming language is usually split into the two components of syntax (form) and semantics (meaning), which are usually defined by a formal language. Some languages are defined by a specification document (for example, the C programming language is specified by an ISO Standard) while other languages (such as Perl) have a dominant implementation that is treated as a reference. Some languages have both, with the basic language defined by a standard and extensions taken from the dominant implementation being common." 6 | "Niagara Falls is a group of three waterfalls at the southern end of Niagara Gorge, spanning the border between the province of Ontario in Canada and the state of New York in the United States. The largest of the three is Horseshoe Falls, which straddles the international border of the two countries. It is also known as the Canadian Falls. The smaller American Falls and Bridal Veil Falls lie within the United States. Bridal Veil Falls is separated from Horseshoe Falls by Goat Island and from American Falls by Luna Island, with both islands situated in New York. Formed by the Niagara River, which drains Lake Erie into Lake Ontario, the combined falls have the highest flow rate of any waterfall in North America that has a vertical drop of more than 50 m (160 ft). During peak daytime tourist hours, more than 168,000 m3 (5.9 million cu ft) of water goes over the crest of the falls every minute." 7 | "Big data refers to data sets that are too large or complex to be dealt with by traditional data-processing application software. Data with many fields (rows) offer greater statistical power, while data with higher complexity (more attributes or columns) may lead to a higher false discovery rate. Big data analysis challenges include capturing data, data storage, data analysis, search, sharing, transfer, visualization, querying, updating, information privacy, and data source. Big data was originally associated with three key concepts volume, variety, and velocity. The analysis of big data presents challenges in sampling, and thus previously allowing for only observations and sampling. Thus a fourth concept, veracity, refers to the quality or insightfulness of the data." 8 | "Tumacacori is the site of Mission San José de Tumacácori, a Franciscan mission that was built in the late 18th century. It takes its name from an earlier mission site founded by Father Eusebio Kino in 1691, which is on the east side of the Santa Cruz River, south of the national park. This Kino-period mission was founded at an extant native or Sobaipuri settlement and represents the first mission in southern Arizona, but not the first mission in Arizona. The remains of the native settlement are still extant and have been investigated and reported on by archaeologist Deni Seymour. The later Franciscan mission, which is now a ruin preserved as Tumacácori National Historical Park, was never rebuilt after being abandoned after repeated Apache raids in the 19th century that killed farmers and ranchers in the area and put a stop to the growth of the area economy." 9 | "The 2020 Belgian Super Cup was a football match that was planned to be played in late July or early August 2020, as opener of the 2020–21 Belgian football season, between the winners of the 2018–19 Belgian First Division A and the winners of the 2018–19 Belgian Cup, but was cancelled due to the COVID-19 pandemic in Belgium. Due to the COVID-19 outbreak, all football in Belgium was cancelled from mid-March until the end of July. While the decision was made to discontinue the league, awarding the title to league leaders Club Brugge and the entry tickets into the UEFA competitions based on finishing positions, the 2020 Belgian Cup Final was not cancelled but instead rescheduled to be played on 1 August 2020, instead of organizing the 2020 Belgian Super Cup." 10 | "Mapagala fortress was an ancient fortified complex of the Anuradhapura Kingdom long before Kasyapa I built his city, Sigiriya. It is located to the South of Sigiriya and closer to Sigiriya tank. It was built by using unshaped boulders to about 20 ft high. Each stone is broad and thick and some of them are about 10 ft high and about 4 ft long. It is believed that it was built before the time of usage of metal tools. Arthur Maurice Hocart noted that cyclopean style stone walls were used for the fortress, and square hammered stones were used for the ramparts of the citadel. However, his note suggests metal (iron) tools were used for construction. Excavations work in this areas found a few stone forges, which proved the claim on the usage of metal tools." -------------------------------------------------------------------------------- /comparison/wordnet/cs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import gensim.downloader as api 6 | model = api.load('word2vec-google-news-300') 7 | 8 | # Your existing imports here 9 | def calculate_hypervolume(pareto_front, ref_point): 10 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 11 | hypervolume = 0.0 12 | prev_point = [0.0, ref_point[1]] 13 | 14 | for point in sorted_front: 15 | if point[1] < prev_point[1]: 16 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 17 | prev_point = point 18 | 19 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 20 | return hypervolume 21 | 22 | def listToString(s): 23 | str1 = "" 24 | for ele in s: 25 | str1 += str(ele) 26 | str1 += " " 27 | 28 | str1 = str1.replace(' ,', ',') 29 | str1 = str1.replace('_', ' ') 30 | return str1 31 | 32 | def Synonym(word, number): 33 | synonyms = [] 34 | for syn in wordnet.synsets(word): 35 | for lm in syn.lemmas(): 36 | synonyms.append(lm.name()) 37 | 38 | if (not synonyms): 39 | return -2, word 40 | elif number >= len(synonyms): 41 | return len(synonyms)-1, synonyms[len(synonyms)-1] 42 | else: 43 | return int(number), synonyms[int(number-1)] 44 | 45 | def fitness_func1(solution): 46 | print (solution) 47 | 48 | #preprocessing 49 | a = 0 50 | for i in index_array: 51 | if index_array[a] <= 0: 52 | solution[a] = 0 53 | a += 1 54 | 55 | res2 = text.split() 56 | text_converted = [] 57 | index=0 58 | for i in res2: 59 | if solution[index] < 1: 60 | text_converted.append (i) 61 | elif solution[index] >= 1: 62 | number, word = Synonym(i,solution[index]) 63 | text_converted.append (word) 64 | else: 65 | print ('Error') 66 | index += 1 67 | 68 | result = listToString(text_converted) 69 | r = Readability(result) 70 | return r.ari().score 71 | 72 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 73 | 74 | text_array = [] 75 | index_array = [] 76 | 77 | res = text.split() 78 | for i in res: 79 | flag = 0 80 | if ',' in i: 81 | i = i.replace(',', '') 82 | flag = 1 83 | if '.' in i: 84 | i = i.replace('.', '') 85 | flag = 2 86 | 87 | if (not i[0].isupper() and len(i) > 3): 88 | number, word = Synonym(i,6) 89 | text_array.append (word) 90 | index_array.append (number) 91 | else: 92 | text_array.append (i) 93 | index_array.append (0) 94 | 95 | if flag == 1: 96 | cad = text_array[-1] 97 | text_array.pop() 98 | cad = cad + str(',') 99 | text_array.append (cad) 100 | flag = 0 101 | if flag == 2: 102 | cad = text_array[-1] 103 | text_array.pop() 104 | cad = cad + str('.') 105 | text_array.append (cad) 106 | flag = 0 107 | 108 | def obtain_text (solution): 109 | res2 = text.split() 110 | text_converted = [] 111 | index=0 112 | for i in res2: 113 | if solution[index] < 1: 114 | text_converted.append (i) 115 | elif solution[index] >= 1: 116 | number, word = Synonym(i,solution[index]) 117 | text_converted.append (word.upper()) 118 | else: 119 | print ('Error') 120 | index += 1 121 | 122 | result = listToString(text_converted) 123 | return result 124 | 125 | # Define the multi-objective optimization problem 126 | def evaluate(x): 127 | source = text 128 | target = obtain_text(x) 129 | 130 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 131 | 132 | def generate_random_solution(bounds): 133 | return [random.uniform(bounds[i][0], bounds[i][1]) for i in range(len(bounds))] 134 | 135 | def levy_flight(beta): 136 | return np.power((1.0 / np.random.gamma(1.0 + beta)), 1.0 / beta) 137 | 138 | def cuckoo_search_multiobjective(bounds, generations, population_size, pa): 139 | dim = len(bounds) 140 | population = [generate_random_solution(bounds) for _ in range(population_size)] 141 | 142 | for gen in range(generations): 143 | population.sort(key=lambda x: evaluate(x)) 144 | new_population = population[:population_size//2] 145 | 146 | for _ in range(population_size - population_size//2): 147 | if random.random() < pa: 148 | selected_cuckoo = random.choice(new_population) 149 | cuckoo = [x + levy_flight(1.5) * (x - y) for x, y in zip(selected_cuckoo, population[random.randint(0, population_size//2-1)])] 150 | cuckoo = np.clip(cuckoo, bounds[:, 0], bounds[:, 1]) 151 | new_population.append(cuckoo) 152 | else: 153 | new_population.append(generate_random_solution(bounds)) 154 | 155 | population = new_population 156 | 157 | population.sort(key=lambda x: evaluate(x)) 158 | 159 | pareto_front = [] 160 | for ind in population: 161 | dominated = False 162 | to_remove = [] 163 | for idx, existing in enumerate(pareto_front): 164 | if all(a <= b for a, b in zip(existing, ind)): 165 | to_remove.append(idx) 166 | elif all(a >= b for a, b in zip(existing, ind)): 167 | dominated = True 168 | break 169 | if not dominated: 170 | pareto_front = [existing for idx, existing in enumerate(pareto_front) if idx not in to_remove] 171 | pareto_front.append(ind) 172 | 173 | return pareto_front 174 | 175 | 176 | if __name__ == "__main__": 177 | random.seed(42) 178 | np.random.seed(42) 179 | 180 | individual_length = len(index_array) # Length of the individual (example value) 181 | bounds = np.array([[-5, 5]] * individual_length) # Example bounds for variables 182 | 183 | generations = 40 184 | population_size = 20 185 | pa = 0.25 186 | 187 | pareto_front = cuckoo_search_multiobjective(bounds, generations, population_size, pa) 188 | 189 | front = [] 190 | for ind in pareto_front: 191 | print (evaluate(ind)) 192 | front.append (evaluate(ind)) 193 | 194 | ref_point = [30.0, 20.0, 1.0] 195 | hypervolume = calculate_hypervolume(front, ref_point) 196 | print("Hypervolume:", hypervolume) 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /comparison/web/jaya.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import gensim.downloader as api 5 | import gensim.downloader 6 | import requests 7 | import time 8 | start_time = time.time() # Record the starting time 9 | 10 | model = api.load('word2vec-google-news-300') 11 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 12 | 13 | def calculate_hypervolume(pareto_front, ref_point): 14 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 15 | hypervolume = 0.0 16 | prev_point = [0.0, ref_point[1]] 17 | 18 | for point in sorted_front: 19 | if point[1] < prev_point[1]: 20 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 21 | prev_point = point 22 | 23 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 24 | return hypervolume 25 | 26 | 27 | 28 | def listToString(s): 29 | str1 = "" 30 | for ele in s: 31 | str1 += str(ele) 32 | str1 += " " 33 | 34 | str1 = str1.replace(' ,', ',') 35 | str1 = str1.replace('_', ' ') 36 | return str1 37 | 38 | def Synonym(word, number): 39 | synonyms = [] 40 | 41 | if (Dict.get(word) is not None): 42 | synonyms = Dict.get(word) 43 | 44 | if (not synonyms): 45 | return -2, word 46 | elif number >= len(synonyms): 47 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 48 | else: 49 | return int(number), synonyms[int(number-1)][0] 50 | 51 | def fitness_func1(solution): 52 | print (solution) 53 | 54 | #preprocessing 55 | a = 0 56 | for i in index_array: 57 | if index_array[a] <= 0: 58 | solution[a] = 0 59 | a += 1 60 | 61 | res2 = text.split() 62 | text_converted = [] 63 | index=0 64 | for i in res2: 65 | if solution[index] < 1: 66 | text_converted.append (i) 67 | elif solution[index] >= 1: 68 | number, word = Synonym(i,solution[index]) 69 | text_converted.append (word) 70 | else: 71 | print ('Error') 72 | index += 1 73 | 74 | result = listToString(text_converted) 75 | r = Readability(result) 76 | return r.ari().score 77 | 78 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 79 | 80 | #Creates a dictionary in order to store all the synonyms in main memory 81 | resource = text.split() 82 | Dict = {} 83 | for i in resource: 84 | if ',' in i: 85 | i = i.replace(',', '') 86 | if '.' in i: 87 | i = i.replace('.', '') 88 | 89 | if i in Dict.keys(): 90 | print ("Processing...Please wait") 91 | else: 92 | if (not i[0].isupper() and len(i) > 3): 93 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 94 | req = requests.get(str1) 95 | try: 96 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 97 | except TypeError as e: 98 | print ("Processing...Please wait") 99 | dict_synonyms = None 100 | 101 | if dict_synonyms is not None: 102 | synonyms = [r["term"] for r in dict_synonyms] 103 | if synonyms: 104 | Dict[i] = [] 105 | Dict[i] = synonyms 106 | 107 | text_array = [] 108 | index_array = [] 109 | 110 | res = text.split() 111 | for i in res: 112 | flag = 0 113 | if ',' in i: 114 | i = i.replace(',', '') 115 | flag = 1 116 | if '.' in i: 117 | i = i.replace('.', '') 118 | flag = 2 119 | 120 | if (not i[0].isupper() and len(i) > 3): 121 | number, word = Synonym(i,6) 122 | text_array.append (word) 123 | index_array.append (number) 124 | else: 125 | text_array.append (i) 126 | index_array.append (0) 127 | 128 | if flag == 1: 129 | cad = text_array[-1] 130 | text_array.pop() 131 | cad = cad + str(',') 132 | text_array.append (cad) 133 | flag = 0 134 | if flag == 2: 135 | cad = text_array[-1] 136 | text_array.pop() 137 | cad = cad + str('.') 138 | text_array.append (cad) 139 | flag = 0 140 | 141 | def obtain_text (solution): 142 | res2 = text.split() 143 | text_converted = [] 144 | index=0 145 | for i in res2: 146 | if solution[index] < 1: 147 | text_converted.append (i) 148 | elif solution[index] >= 1: 149 | number, word = Synonym(i,solution[index]) 150 | text_converted.append (word.upper()) 151 | else: 152 | print ('Error') 153 | index += 1 154 | 155 | result = listToString(text_converted) 156 | return result 157 | 158 | 159 | # Multi-objective function to be minimized 160 | def multi_objective(x): 161 | 162 | source = text 163 | target = obtain_text(x) 164 | 165 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 166 | 167 | def jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound): 168 | population = np.random.uniform(lower_bound, upper_bound, (pop_size, num_variables)) 169 | 170 | for _ in range(num_iterations): 171 | new_population = population.copy() 172 | 173 | for i in range(pop_size): 174 | for j in range(num_variables): 175 | rand_idx = np.random.randint(pop_size) 176 | new_value = population[i, j] + np.random.uniform(-1, 1) * (population[rand_idx, j] - population[i, j]) 177 | new_value = np.clip(new_value, lower_bound, upper_bound) 178 | new_population[i, j] = new_value 179 | 180 | population = new_population 181 | 182 | # Calculate the objective values for each individual in the final population 183 | objective_values = np.array([multi_objective(individual) for individual in population]) 184 | return population, objective_values 185 | 186 | pop_size = 20 187 | num_iterations = 50 188 | num_variables = len(index_array) 189 | lower_bound = -4 190 | upper_bound = 4 191 | 192 | final_population, final_objective_values = jaya_multi_objective(pop_size, num_iterations, num_variables, lower_bound, upper_bound) 193 | 194 | end_time = time.time() # Record the ending time 195 | elapsed_time = end_time - start_time # Calculate the elapsed time 196 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 197 | 198 | print("Final Population:") 199 | print(final_population) 200 | 201 | print("\nFinal Objective Values:") 202 | print(final_objective_values) 203 | 204 | ref_point = [60.0, 20.0, 1.0] 205 | hypervolume = calculate_hypervolume(final_objective_values, ref_point) 206 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/word2vec/gde3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.gde3 import GDE3 11 | from jmetal.util.termination_criterion import StoppingByEvaluations 12 | from readability import Readability 13 | from nltk.corpus import wordnet 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | import gensim.downloader as api 17 | import gensim.downloader 18 | 19 | model = api.load('word2vec-google-news-300') 20 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 21 | 22 | def calculate_hypervolume(pareto_front, ref_point): 23 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 24 | hypervolume = 0.0 25 | prev_point = [0.0, ref_point[1]] 26 | 27 | for point in sorted_front: 28 | if point[1] < prev_point[1]: 29 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 30 | prev_point = point 31 | 32 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 33 | return hypervolume 34 | 35 | def listToString(s): 36 | str1 = "" 37 | for ele in s: 38 | str1 += str(ele) 39 | str1 += " " 40 | 41 | str1 = str1.replace(' ,', ',') 42 | str1 = str1.replace('_', ' ') 43 | return str1 44 | 45 | def Synonym(word, number): 46 | synonyms = [] 47 | 48 | if (Dict.get(word) is not None): 49 | synonyms = Dict.get(word) 50 | 51 | if (not synonyms): 52 | return -2, word 53 | elif number >= len(synonyms): 54 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 55 | else: 56 | return int(number), synonyms[int(number-1)][0] 57 | 58 | def fitness_func1(solution): 59 | #preprocessing 60 | a = 0 61 | for i in index_array: 62 | if index_array[a] <= 0: 63 | solution[a] = 0 64 | a += 1 65 | 66 | res2 = text.split() 67 | text_converted = [] 68 | index=0 69 | for i in res2: 70 | if solution[index] < 1: 71 | text_converted.append (i) 72 | elif solution[index] >= 1: 73 | number, word = Synonym(i,solution[index]) 74 | text_converted.append (word) 75 | else: 76 | print ('Error') 77 | index += 1 78 | 79 | result = listToString(text_converted) 80 | r = Readability(result) 81 | return r.ari().score 82 | 83 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 84 | 85 | #Creates a dictionary in order to store all the synonyms in main memory 86 | resource = text.split() 87 | Dict = {} 88 | for i in resource: 89 | if ',' in i: 90 | i = i.replace(',', '') 91 | if '.' in i: 92 | i = i.replace('.', '') 93 | 94 | if (not i[0].isupper() and len(i) > 3): 95 | if i in Dict.keys(): 96 | print ("Processing...Please wait") 97 | else: 98 | try: 99 | synonyms = google_news_vectors.most_similar(i, topn=6) 100 | except KeyError as e: 101 | print (e) 102 | synonyms = None 103 | if synonyms is not None: 104 | Dict[i] = [] 105 | Dict[i] = synonyms 106 | 107 | text_array = [] 108 | index_array = [] 109 | 110 | res = text.split() 111 | for i in res: 112 | flag = 0 113 | if ',' in i: 114 | i = i.replace(',', '') 115 | flag = 1 116 | if '.' in i: 117 | i = i.replace('.', '') 118 | flag = 2 119 | 120 | if (not i[0].isupper() and len(i) > 3): 121 | number, word = Synonym(i,6) 122 | text_array.append (word) 123 | index_array.append (number) 124 | else: 125 | text_array.append (i) 126 | index_array.append (0) 127 | 128 | if flag == 1: 129 | cad = text_array[-1] 130 | text_array.pop() 131 | cad = cad + str(',') 132 | text_array.append (cad) 133 | flag = 0 134 | if flag == 2: 135 | cad = text_array[-1] 136 | text_array.pop() 137 | cad = cad + str('.') 138 | text_array.append (cad) 139 | flag = 0 140 | 141 | def obtain_text (solution): 142 | res2 = text.split() 143 | text_converted = [] 144 | index=0 145 | for i in res2: 146 | if solution[index] < 1: 147 | text_converted.append (i) 148 | elif solution[index] >= 1: 149 | number, word = Synonym(i,solution[index]) 150 | text_converted.append (word.upper()) 151 | else: 152 | print ('Error') 153 | index += 1 154 | 155 | result = listToString(text_converted) 156 | return result 157 | 158 | 159 | class Oruga(FloatProblem): 160 | 161 | def __init__(self): 162 | super(Oruga, self).__init__() 163 | self.number_of_objectives = 3 164 | self.number_of_variables = len(index_array) 165 | self.number_of_constraints = 0 166 | 167 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 168 | self.obj_labels = ['f(x)', 'f(y)'] 169 | 170 | self.lower_bound = self.number_of_variables * [-4] 171 | self.upper_bound = self.number_of_variables * [4] 172 | 173 | FloatSolution.lower_bound = self.lower_bound 174 | FloatSolution.upper_bound = self.upper_bound 175 | 176 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 177 | 178 | source = text 179 | target = obtain_text(solution.variables) 180 | 181 | solution.objectives[2] = float (model.wmdistance(source, target)) 182 | solution.objectives[1] = fitness_func1(solution.variables) 183 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 184 | 185 | return solution 186 | 187 | 188 | def get_name(self): 189 | return 'Oruga' 190 | 191 | max_evaluations = 3000 192 | problem = Oruga() 193 | algorithm = GDE3( 194 | problem=problem, 195 | population_size=100, 196 | cr=0.5, 197 | f=0.5, 198 | termination_criterion=StoppingByEvaluations(max_evaluations) 199 | ) 200 | 201 | algorithm.run() 202 | 203 | from jmetal.util.solution import get_non_dominated_solutions 204 | 205 | front = get_non_dominated_solutions(algorithm.get_result()) 206 | 207 | # Print number of solutions 208 | print (len(front)) 209 | 210 | # Print time 211 | print (algorithm.total_computing_time) 212 | 213 | # Define the reference point (maximum values for each objective) 214 | ref_point = [30.0, 20.0, 1.0] 215 | 216 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 217 | 218 | print("Pareto Front:") 219 | for point in pareto_front: 220 | print(point) 221 | 222 | # Calculate the hypervolume 223 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 224 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/word2vec/nsga2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective import NSGAII 11 | from jmetal.operator import SBXCrossover, PolynomialMutation 12 | from jmetal.util.termination_criterion import StoppingByEvaluations 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from jmetal.core.problem import FloatProblem 16 | from jmetal.core.solution import FloatSolution 17 | import gensim.downloader as api 18 | import gensim.downloader 19 | 20 | model = api.load('word2vec-google-news-300') 21 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 22 | 23 | 24 | def calculate_hypervolume(pareto_front, ref_point): 25 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 26 | hypervolume = 0.0 27 | prev_point = [0.0, ref_point[1]] 28 | 29 | for point in sorted_front: 30 | if point[1] < prev_point[1]: 31 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 32 | prev_point = point 33 | 34 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 35 | return hypervolume 36 | 37 | def listToString(s): 38 | str1 = "" 39 | for ele in s: 40 | str1 += str(ele) 41 | str1 += " " 42 | 43 | str1 = str1.replace(' ,', ',') 44 | str1 = str1.replace('_', ' ') 45 | return str1 46 | 47 | def Synonym(word, number): 48 | synonyms = [] 49 | 50 | if (Dict.get(word) is not None): 51 | synonyms = Dict.get(word) 52 | 53 | if (not synonyms): 54 | return -2, word 55 | elif number >= len(synonyms): 56 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 57 | else: 58 | return int(number), synonyms[int(number-1)][0] 59 | 60 | def fitness_func1(solution): 61 | #preprocessing 62 | a = 0 63 | for i in index_array: 64 | if index_array[a] <= 0: 65 | solution[a] = 0 66 | a += 1 67 | 68 | res2 = text.split() 69 | text_converted = [] 70 | index=0 71 | for i in res2: 72 | if solution[index] < 1: 73 | text_converted.append (i) 74 | elif solution[index] >= 1: 75 | number, word = Synonym(i,solution[index]) 76 | text_converted.append (word) 77 | else: 78 | print ('Error') 79 | index += 1 80 | 81 | result = listToString(text_converted) 82 | r = Readability(result) 83 | return r.ari().score 84 | 85 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 86 | 87 | 88 | #Creates a dictionary in order to store all the synonyms in main memory 89 | resource = text.split() 90 | Dict = {} 91 | for i in resource: 92 | if ',' in i: 93 | i = i.replace(',', '') 94 | if '.' in i: 95 | i = i.replace('.', '') 96 | 97 | if (not i[0].isupper() and len(i) > 3): 98 | if i in Dict.keys(): 99 | print ("Processing...Please wait") 100 | else: 101 | try: 102 | synonyms = google_news_vectors.most_similar(i, topn=6) 103 | except KeyError as e: 104 | print (e) 105 | synonyms = None 106 | if synonyms is not None: 107 | Dict[i] = [] 108 | Dict[i] = synonyms 109 | 110 | text_array = [] 111 | index_array = [] 112 | 113 | res = text.split() 114 | for i in res: 115 | flag = 0 116 | if ',' in i: 117 | i = i.replace(',', '') 118 | flag = 1 119 | if '.' in i: 120 | i = i.replace('.', '') 121 | flag = 2 122 | 123 | if (not i[0].isupper() and len(i) > 3): 124 | number, word = Synonym(i,6) 125 | text_array.append (word) 126 | index_array.append (number) 127 | else: 128 | text_array.append (i) 129 | index_array.append (0) 130 | 131 | if flag == 1: 132 | cad = text_array[-1] 133 | text_array.pop() 134 | cad = cad + str(',') 135 | text_array.append (cad) 136 | flag = 0 137 | if flag == 2: 138 | cad = text_array[-1] 139 | text_array.pop() 140 | cad = cad + str('.') 141 | text_array.append (cad) 142 | flag = 0 143 | 144 | def obtain_text (solution): 145 | res2 = text.split() 146 | text_converted = [] 147 | index=0 148 | for i in res2: 149 | if solution[index] < 1: 150 | text_converted.append (i) 151 | elif solution[index] >= 1: 152 | number, word = Synonym(i,solution[index]) 153 | text_converted.append (word.upper()) 154 | else: 155 | print ('Error') 156 | index += 1 157 | 158 | result = listToString(text_converted) 159 | return result 160 | 161 | 162 | class Oruga(FloatProblem): 163 | 164 | def __init__(self): 165 | super(Oruga, self).__init__() 166 | self.number_of_objectives = 3 167 | self.number_of_variables = len(index_array) 168 | self.number_of_constraints = 0 169 | 170 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 171 | self.obj_labels = ['f(x)', 'f(y)'] 172 | 173 | self.lower_bound = self.number_of_variables * [-4] 174 | self.upper_bound = self.number_of_variables * [4] 175 | 176 | FloatSolution.lower_bound = self.lower_bound 177 | FloatSolution.upper_bound = self.upper_bound 178 | 179 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 180 | 181 | source = text 182 | target = obtain_text(solution.variables) 183 | 184 | solution.objectives[2] = float (model.wmdistance(source, target)) 185 | solution.objectives[1] = fitness_func1(solution.variables) 186 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 187 | 188 | return solution 189 | 190 | 191 | def get_name(self): 192 | return 'Oruga' 193 | 194 | max_evaluations = 3000 195 | problem = Oruga() 196 | algorithm = NSGAII( 197 | problem=problem, 198 | population_size=20, 199 | offspring_population_size=30, 200 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 201 | crossover=SBXCrossover(probability=1.0, distribution_index=20), 202 | termination_criterion=StoppingByEvaluations(max_evaluations=800) 203 | ) 204 | 205 | algorithm.run() 206 | 207 | from jmetal.util.solution import get_non_dominated_solutions 208 | 209 | front = get_non_dominated_solutions(algorithm.get_result()) 210 | 211 | # Print number of solutions 212 | print (len(front)) 213 | 214 | # Print time 215 | print (algorithm.total_computing_time) 216 | 217 | # Define the reference point (maximum values for each objective) 218 | ref_point = [30.0, 20.0, 1.0] 219 | 220 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 221 | 222 | print("Pareto Front:") 223 | for point in pareto_front: 224 | print(point) 225 | 226 | # Calculate the hypervolume 227 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 228 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/web/gde3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | import requests 10 | from jmetal.algorithm.multiobjective.gde3 import GDE3 11 | from jmetal.util.termination_criterion import StoppingByEvaluations 12 | from readability import Readability 13 | from nltk.corpus import wordnet 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | import gensim.downloader as api 17 | import gensim.downloader 18 | 19 | model = api.load('word2vec-google-news-300') 20 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 21 | 22 | def calculate_hypervolume(pareto_front, ref_point): 23 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 24 | hypervolume = 0.0 25 | prev_point = [0.0, ref_point[1]] 26 | 27 | for point in sorted_front: 28 | if point[1] < prev_point[1]: 29 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 30 | prev_point = point 31 | 32 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 33 | return hypervolume 34 | 35 | def listToString(s): 36 | str1 = "" 37 | for ele in s: 38 | str1 += str(ele) 39 | str1 += " " 40 | 41 | str1 = str1.replace(' ,', ',') 42 | str1 = str1.replace('_', ' ') 43 | return str1 44 | 45 | def Synonym(word, number): 46 | synonyms = [] 47 | 48 | if (Dict.get(word) is not None): 49 | synonyms = Dict.get(word) 50 | 51 | if (not synonyms): 52 | return -2, word 53 | elif number >= len(synonyms): 54 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 55 | else: 56 | return int(number), synonyms[int(number-1)][0] 57 | 58 | def fitness_func1(solution): 59 | #preprocessing 60 | a = 0 61 | for i in index_array: 62 | if index_array[a] <= 0: 63 | solution[a] = 0 64 | a += 1 65 | 66 | res2 = text.split() 67 | text_converted = [] 68 | index=0 69 | for i in res2: 70 | if solution[index] < 1: 71 | text_converted.append (i) 72 | elif solution[index] >= 1: 73 | number, word = Synonym(i,solution[index]) 74 | text_converted.append (word) 75 | else: 76 | print ('Error') 77 | index += 1 78 | 79 | result = listToString(text_converted) 80 | r = Readability(result) 81 | return r.ari().score 82 | 83 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 84 | 85 | #Creates a dictionary in order to store all the synonyms in main memory 86 | resource = text.split() 87 | Dict = {} 88 | for i in resource: 89 | if ',' in i: 90 | i = i.replace(',', '') 91 | if '.' in i: 92 | i = i.replace('.', '') 93 | 94 | if i in Dict.keys(): 95 | print ("Processing...Please wait") 96 | else: 97 | if (not i[0].isupper() and len(i) > 3): 98 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 99 | req = requests.get(str1) 100 | try: 101 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 102 | except TypeError as e: 103 | print ("Processing...Please wait") 104 | dict_synonyms = None 105 | 106 | if dict_synonyms is not None: 107 | synonyms = [r["term"] for r in dict_synonyms] 108 | if synonyms: 109 | Dict[i] = [] 110 | Dict[i] = synonyms 111 | 112 | text_array = [] 113 | index_array = [] 114 | 115 | res = text.split() 116 | for i in res: 117 | flag = 0 118 | if ',' in i: 119 | i = i.replace(',', '') 120 | flag = 1 121 | if '.' in i: 122 | i = i.replace('.', '') 123 | flag = 2 124 | 125 | if (not i[0].isupper() and len(i) > 3): 126 | number, word = Synonym(i,6) 127 | text_array.append (word) 128 | index_array.append (number) 129 | else: 130 | text_array.append (i) 131 | index_array.append (0) 132 | 133 | if flag == 1: 134 | cad = text_array[-1] 135 | text_array.pop() 136 | cad = cad + str(',') 137 | text_array.append (cad) 138 | flag = 0 139 | if flag == 2: 140 | cad = text_array[-1] 141 | text_array.pop() 142 | cad = cad + str('.') 143 | text_array.append (cad) 144 | flag = 0 145 | 146 | def obtain_text (solution): 147 | res2 = text.split() 148 | text_converted = [] 149 | index=0 150 | for i in res2: 151 | if solution[index] < 1: 152 | text_converted.append (i) 153 | elif solution[index] >= 1: 154 | number, word = Synonym(i,solution[index]) 155 | text_converted.append (word.upper()) 156 | else: 157 | print ('Error') 158 | index += 1 159 | 160 | result = listToString(text_converted) 161 | return result 162 | 163 | 164 | class Oruga(FloatProblem): 165 | 166 | def __init__(self): 167 | super(Oruga, self).__init__() 168 | self.number_of_objectives = 3 169 | self.number_of_variables = len(index_array) 170 | self.number_of_constraints = 0 171 | 172 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 173 | self.obj_labels = ['f(x)', 'f(y)'] 174 | 175 | self.lower_bound = self.number_of_variables * [-4] 176 | self.upper_bound = self.number_of_variables * [4] 177 | 178 | FloatSolution.lower_bound = self.lower_bound 179 | FloatSolution.upper_bound = self.upper_bound 180 | 181 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 182 | 183 | source = text 184 | target = obtain_text(solution.variables) 185 | 186 | solution.objectives[2] = float (model.wmdistance(source, target)) 187 | solution.objectives[1] = fitness_func1(solution.variables) 188 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 189 | 190 | return solution 191 | 192 | 193 | def get_name(self): 194 | return 'Oruga' 195 | 196 | max_evaluations = 3000 197 | problem = Oruga() 198 | algorithm = GDE3( 199 | problem=problem, 200 | population_size=100, 201 | cr=0.5, 202 | f=0.5, 203 | termination_criterion=StoppingByEvaluations(max_evaluations) 204 | ) 205 | 206 | algorithm.run() 207 | 208 | from jmetal.util.solution import get_non_dominated_solutions 209 | 210 | front = get_non_dominated_solutions(algorithm.get_result()) 211 | 212 | # Print number of solutions 213 | print (len(front)) 214 | 215 | # Print time 216 | print (algorithm.total_computing_time) 217 | 218 | # Define the reference point (maximum values for each objective) 219 | ref_point = [30.0, 20.0, 1.0] 220 | 221 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 222 | 223 | print("Pareto Front:") 224 | for point in pareto_front: 225 | print(point) 226 | 227 | # Calculate the hypervolume 228 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 229 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/word2vec/pso.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Particle Swarm Optimization (PSO) 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.smpso import SMPSO 11 | from jmetal.operator import PolynomialMutation 12 | from jmetal.util.archive import CrowdingDistanceArchive 13 | from jmetal.util.termination_criterion import StoppingByEvaluations 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | from jmetal.util.solution import get_non_dominated_solutions 17 | from nltk.corpus import wordnet 18 | from readability import Readability 19 | import gensim.downloader as api 20 | import gensim.downloader 21 | 22 | model = api.load('word2vec-google-news-300') 23 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 24 | 25 | def calculate_hypervolume(pareto_front, ref_point): 26 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 27 | hypervolume = 0.0 28 | prev_point = [0.0, ref_point[1]] 29 | 30 | for point in sorted_front: 31 | if point[1] < prev_point[1]: 32 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 33 | prev_point = point 34 | 35 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 36 | return hypervolume 37 | 38 | 39 | 40 | def listToString(s): 41 | str1 = "" 42 | for ele in s: 43 | str1 += str(ele) 44 | str1 += " " 45 | 46 | str1 = str1.replace(' ,', ',') 47 | str1 = str1.replace('_', ' ') 48 | return str1 49 | 50 | def Synonym(word, number): 51 | synonyms = [] 52 | 53 | if (Dict.get(word) is not None): 54 | synonyms = Dict.get(word) 55 | 56 | if (not synonyms): 57 | return -2, word 58 | elif number >= len(synonyms): 59 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 60 | else: 61 | return int(number), synonyms[int(number-1)][0] 62 | 63 | def fitness_func1(solution): 64 | # preprocessing 65 | a = 0 66 | for i in index_array: 67 | if index_array[a] <= 0: 68 | solution[a] = 0 69 | a += 1 70 | 71 | res2 = text.split() 72 | text_converted = [] 73 | index=0 74 | for i in res2: 75 | if solution[index] < 1: 76 | text_converted.append (i) 77 | elif solution[index] >= 1: 78 | number, word = Synonym(i,solution[index]) 79 | text_converted.append (word) 80 | else: 81 | print ('Error') 82 | index += 1 83 | 84 | result = listToString(text_converted) 85 | r = Readability(result) 86 | return r.flesch_kincaid().score 87 | 88 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 89 | 90 | #Creates a dictionary in order to store all the synonyms in main memory 91 | resource = text.split() 92 | Dict = {} 93 | for i in resource: 94 | if ',' in i: 95 | i = i.replace(',', '') 96 | if '.' in i: 97 | i = i.replace('.', '') 98 | 99 | if (not i[0].isupper() and len(i) > 3): 100 | if i in Dict.keys(): 101 | print ("Processing...Please wait") 102 | else: 103 | try: 104 | synonyms = google_news_vectors.most_similar(i, topn=6) 105 | except KeyError as e: 106 | print (e) 107 | synonyms = None 108 | if synonyms is not None: 109 | Dict[i] = [] 110 | Dict[i] = synonyms 111 | 112 | text_array = [] 113 | index_array = [] 114 | 115 | res = text.split() 116 | for i in res: 117 | flag = 0 118 | if ',' in i: 119 | i = i.replace(',', '') 120 | flag = 1 121 | if '.' in i: 122 | i = i.replace('.', '') 123 | flag = 2 124 | 125 | if (not i[0].isupper() and len(i) > 3): 126 | number, word = Synonym(i,6) 127 | text_array.append (word) 128 | index_array.append (number) 129 | else: 130 | text_array.append (i) 131 | index_array.append (0) 132 | 133 | if flag == 1: 134 | cad = text_array[-1] 135 | text_array.pop() 136 | cad = cad + str(',') 137 | text_array.append (cad) 138 | flag = 0 139 | if flag == 2: 140 | cad = text_array[-1] 141 | text_array.pop() 142 | cad = cad + str('.') 143 | text_array.append (cad) 144 | flag = 0 145 | 146 | def obtain_text (solution): 147 | res2 = text.split() 148 | text_converted = [] 149 | index=0 150 | for i in res2: 151 | if solution[index] < 1: 152 | text_converted.append (i) 153 | elif solution[index] >= 1: 154 | number, word = Synonym(i,solution[index]) 155 | text_converted.append (word.upper()) 156 | else: 157 | print ('Error') 158 | index += 1 159 | 160 | result = listToString(text_converted) 161 | return result 162 | 163 | 164 | class Oruga(FloatProblem): 165 | 166 | def __init__(self): 167 | super(Oruga, self).__init__() 168 | self.number_of_objectives = 3 169 | self.number_of_variables = len(index_array) 170 | self.number_of_constraints = 0 171 | 172 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE, self.MINIMIZE] 173 | self.obj_labels = ['f(x)', 'f(y)', 'f(z)'] 174 | 175 | self.lower_bound = self.number_of_variables * [-4] 176 | self.upper_bound = self.number_of_variables * [4] 177 | 178 | FloatSolution.lower_bound = self.lower_bound 179 | FloatSolution.upper_bound = self.upper_bound 180 | 181 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 182 | 183 | source = text 184 | target = obtain_text(solution.variables) 185 | 186 | solution.objectives[2] = float(model.wmdistance(source, target)) 187 | solution.objectives[1] = fitness_func1(solution.variables) 188 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 189 | 190 | return solution 191 | 192 | def get_name(self): 193 | return 'Oruga' 194 | 195 | problem = Oruga() 196 | 197 | max_evaluations = 100 198 | algorithm = SMPSO( 199 | problem=problem, 200 | swarm_size=100, 201 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 202 | leaders=CrowdingDistanceArchive(100), 203 | termination_criterion=StoppingByEvaluations(max_evaluations) 204 | ) 205 | 206 | algorithm.run() 207 | solutions = algorithm.get_result() 208 | front = get_non_dominated_solutions(solutions) 209 | 210 | # Print number of solutions 211 | print (len(front)) 212 | 213 | # Print time 214 | print (algorithm.total_computing_time) 215 | 216 | # Define the reference point (maximum values for each objective) 217 | ref_point = [30.0, 20.0, 1.0] 218 | 219 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 220 | 221 | print("Pareto Front:") 222 | for point in pareto_front: 223 | print(point) 224 | 225 | # Calculate the hypervolume 226 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 227 | print("Hypervolume:", hypervolume) 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /comparison/wordnet/tlbo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import gensim.downloader as api 6 | import nltk 7 | from nltk.corpus import wordnet 8 | from deap import base, creator, tools 9 | 10 | model = api.load('word2vec-google-news-300') 11 | 12 | def calculate_hypervolume(pareto_front, ref_point): 13 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 14 | hypervolume = 0.0 15 | prev_point = [0.0, ref_point[1]] 16 | 17 | for point in sorted_front: 18 | if point[1] < prev_point[1]: 19 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 20 | prev_point = point 21 | 22 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 23 | return hypervolume 24 | 25 | def listToString(s): 26 | str1 = "" 27 | for ele in s: 28 | str1 += str(ele) 29 | str1 += " " 30 | 31 | str1 = str1.replace(' ,', ',') 32 | str1 = str1.replace('_', ' ') 33 | return str1 34 | 35 | def Synonym(word, number): 36 | synonyms = [] 37 | for syn in wordnet.synsets(word): 38 | for lm in syn.lemmas(): 39 | synonyms.append(lm.name()) 40 | 41 | if (not synonyms): 42 | return -2, word 43 | elif number >= len(synonyms): 44 | return len(synonyms)-1, synonyms[len(synonyms)-1] 45 | else: 46 | return int(number), synonyms[int(number-1)] 47 | 48 | def fitness_func1(solution): 49 | print (solution) 50 | 51 | #preprocessing 52 | a = 0 53 | for i in index_array: 54 | if index_array[a] <= 0: 55 | solution[a] = 0 56 | a += 1 57 | 58 | res2 = text.split() 59 | text_converted = [] 60 | index=0 61 | for i in res2: 62 | if solution[index] < 1: 63 | text_converted.append (i) 64 | elif solution[index] >= 1: 65 | number, word = Synonym(i,solution[index]) 66 | text_converted.append (word) 67 | else: 68 | print ('Error') 69 | index += 1 70 | 71 | result = listToString(text_converted) 72 | r = Readability(result) 73 | return r.ari().score 74 | 75 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 76 | 77 | text_array = [] 78 | index_array = [] 79 | 80 | res = text.split() 81 | for i in res: 82 | flag = 0 83 | if ',' in i: 84 | i = i.replace(',', '') 85 | flag = 1 86 | if '.' in i: 87 | i = i.replace('.', '') 88 | flag = 2 89 | 90 | if (not i[0].isupper() and len(i) > 3): 91 | number, word = Synonym(i,6) 92 | text_array.append (word) 93 | index_array.append (number) 94 | else: 95 | text_array.append (i) 96 | index_array.append (0) 97 | 98 | if flag == 1: 99 | cad = text_array[-1] 100 | text_array.pop() 101 | cad = cad + str(',') 102 | text_array.append (cad) 103 | flag = 0 104 | if flag == 2: 105 | cad = text_array[-1] 106 | text_array.pop() 107 | cad = cad + str('.') 108 | text_array.append (cad) 109 | flag = 0 110 | 111 | def obtain_text (solution): 112 | res2 = text.split() 113 | text_converted = [] 114 | index=0 115 | for i in res2: 116 | if solution[index] < 1: 117 | text_converted.append (i) 118 | elif solution[index] >= 1: 119 | number, word = Synonym(i,solution[index]) 120 | text_converted.append (word.upper()) 121 | else: 122 | print ('Error') 123 | index += 1 124 | 125 | result = listToString(text_converted) 126 | return result 127 | 128 | # Define your problem's objective function 129 | def objective_function(x): 130 | 131 | source = text 132 | target = obtain_text(x) 133 | 134 | return [len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target))] 135 | 136 | # TLBO parameters 137 | population_size = 20 138 | max_generations = 50 139 | dimension = len(index_array) 140 | lower_bound = -4 141 | upper_bound = 4 142 | 143 | # DEAP initialization 144 | creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0, -1.0)) 145 | creator.create("Individual", list, fitness=creator.FitnessMin) 146 | 147 | toolbox = base.Toolbox() 148 | toolbox.register("attr_float", random.uniform, lower_bound, upper_bound) 149 | toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=dimension) 150 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 151 | 152 | toolbox.register("evaluate", objective_function) 153 | 154 | def teaching_phase(learners, mean_teacher): 155 | for learner in learners: 156 | diff = [mean_teacher[dim] - learner[dim] for dim in range(dimension)] 157 | random_values = [random.random() for _ in range(dimension)] 158 | update_vector = [random_value * diff[dim] for dim, random_value in enumerate(random_values)] 159 | learner[:] = [learner[dim] + update_vector[dim] for dim in range(dimension)] 160 | 161 | toolbox.register("mate", tools.cxBlend, alpha=0.5) 162 | toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1) 163 | toolbox.register("select", tools.selBest) 164 | 165 | 166 | def main(): 167 | population = toolbox.population(n=population_size) 168 | 169 | # Attach fitness values to the initial population 170 | for ind in population: 171 | ind.fitness.values = toolbox.evaluate(ind) 172 | 173 | pareto_front = [] # Initialize Pareto front 174 | 175 | # TLBO main loop 176 | for generation in range(max_generations): 177 | teachers = toolbox.select(population, k=5) 178 | mean_teacher = [sum(teacher[dim] for teacher in teachers) / len(teachers) for dim in range(dimension)] 179 | 180 | learners = population[:] 181 | teaching_phase(learners, mean_teacher) 182 | 183 | offspring = learners[:] # No genetic operations 184 | 185 | # Attach fitness values to the offspring 186 | for ind in offspring: 187 | ind.fitness.values = toolbox.evaluate(ind) 188 | 189 | for i in range(population_size): 190 | offspring_fitness = offspring[i].fitness.values 191 | for ind in population: 192 | if ind != offspring[i]: 193 | ind_fitness = ind.fitness.values 194 | is_dominated = all(offspring_fitness[dim] <= ind_fitness[dim] for dim in range(len(offspring_fitness))) 195 | if not is_dominated: 196 | if offspring[i] not in pareto_front: 197 | pareto_front.append(offspring[i]) 198 | 199 | final = [] 200 | front = [] 201 | for item in pareto_front: 202 | if item not in front: 203 | front.append(item) 204 | 205 | # Print fitness 206 | for solution in front: 207 | final.append(solution.fitness.values) 208 | print("Fitness:", solution.fitness.values) 209 | 210 | ref_point = [30.0, 20.0, 1.0] 211 | hypervolume = calculate_hypervolume(final, ref_point) 212 | print("Hypervolume:", hypervolume) 213 | 214 | print (len(pareto_front)) 215 | 216 | if __name__ == "__main__": 217 | main() 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /oruga_wordnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | #print(r.flesch_kincaid().score) 11 | #print(r.flesch().score) 12 | #print(r.gunning_fog()) 13 | #print(r.coleman_liau()) 14 | #print(r.dale_chall()) 15 | #print(r.ari()) 16 | #print(r.linsear_write()) 17 | #print(r.spache()) 18 | 19 | #Coding of individuals 20 | #-2, candidate but not synonym 21 | #-1, special character (if necessary) 22 | #0, not candidate 23 | #1, replaced by 1st option 24 | #2, replaced by 2nd option 25 | #N, replaced by Nth option 26 | 27 | # Modules 28 | import pygad 29 | import language_tool_python 30 | from readability import Readability 31 | from nltk.corpus import wordnet 32 | 33 | text_array = [] 34 | index_array = [] 35 | 36 | #Text 37 | text = 'Niagara Falls is a group of three waterfalls at the southern end of Niagara Gorge, spanning the border between the province of Ontario in Canada and the state of New York in the United States. The largest of the three is Horseshoe Falls, which straddles the international border of the two countries. It is also known as the Canadian Falls. The smaller American Falls and Bridal Veil Falls lie within the United States. Bridal Veil Falls is separated from Horseshoe Falls by Goat Island and from American Falls by Luna Island, with both islands situated in New York. Formed by the Niagara River, which drains Lake Erie into Lake Ontario, the combined falls have the highest flow rate of any waterfall in North America that has a vertical drop of more than 50 m (160 ft). During peak daytime tourist hours, more than 168,000 m3 (5.9 million cu ft) of water goes over the crest of the falls every minute.' 38 | 39 | r = Readability(text) 40 | initial_score = r.flesch_kincaid().score 41 | 42 | def listToString(s): 43 | str1 = "" 44 | for ele in s: 45 | str1 += str(ele) 46 | str1 += " " 47 | 48 | str1 = str1.replace(' ,', ',') 49 | str1 = str1.replace('_', ' ') 50 | return str1 51 | 52 | def Synonym(word, number): 53 | synonyms = [] 54 | for syn in wordnet.synsets(word): 55 | for lm in syn.lemmas(): 56 | synonyms.append(lm.name()) 57 | 58 | if (not synonyms): 59 | return -2, word 60 | elif number >= len(synonyms): 61 | return len(synonyms)-1, synonyms[len(synonyms)-1] 62 | else: 63 | return int(number), synonyms[int(number-1)] 64 | 65 | def obtain_text (solution): 66 | res2 = text.split() 67 | text_converted = [] 68 | index=0 69 | for i in res2: 70 | if solution[index] < 1: 71 | text_converted.append (i) 72 | elif solution[index] >= 1: 73 | number, word = Synonym(i,solution[index]) 74 | text_converted.append (word.upper()) 75 | else: 76 | print ('Error') 77 | index += 1 78 | 79 | result = listToString(text_converted) 80 | return result 81 | 82 | def correct_mistakes (text): 83 | my_tool = language_tool_python.LanguageTool('en-US') 84 | my_text = text 85 | my_matches = my_tool.check(my_text) 86 | 87 | myMistakes = [] 88 | myCorrections = [] 89 | startPositions = [] 90 | endPositions = [] 91 | 92 | # using the for-loop 93 | for rules in my_matches: 94 | if len(rules.replacements) > 0: 95 | startPositions.append(rules.offset) 96 | endPositions.append(rules.errorLength + rules.offset) 97 | myMistakes.append(my_text[rules.offset : rules.errorLength + rules.offset]) 98 | myCorrections.append(rules.replacements[0]) 99 | 100 | # creating new object 101 | my_NewText = list(my_text) 102 | 103 | # rewriting the correct passage 104 | for n in range(len(startPositions)): 105 | for i in range(len(my_text)): 106 | my_NewText[startPositions[n]] = myCorrections[n] 107 | if (i > startPositions[n] and i < endPositions[n]): 108 | my_NewText[i] = "" 109 | 110 | my_NewText = "".join(my_NewText) 111 | 112 | return my_NewText 113 | 114 | def fitness_func(solution, solution_idx): 115 | 116 | #preprocessing 117 | a = 0 118 | for i in index_array: 119 | if index_array[a] <= 0: 120 | solution[a] = 0 121 | a += 1 122 | 123 | res2 = text.split() 124 | text_converted = [] 125 | index=0 126 | for i in res2: 127 | if solution[index] < 1: 128 | text_converted.append (i) 129 | elif solution[index] >= 1: 130 | number, word = Synonym(i,solution[index]) 131 | text_converted.append (word) 132 | else: 133 | print ('Error') 134 | index += 1 135 | 136 | result = listToString(text_converted) 137 | r = Readability(result) 138 | return r.flesch_kincaid().score * -1 139 | 140 | print (text) 141 | res = text.split() 142 | 143 | for i in res: 144 | flag = 0 145 | if ',' in i: 146 | i = i.replace(',', '') 147 | flag = 1 148 | if '.' in i: 149 | i = i.replace('.', '') 150 | flag = 2 151 | 152 | if (not i[0].isupper() and len(i) > 3): 153 | number, word = Synonym(i,6) 154 | text_array.append (word) 155 | index_array.append (number) 156 | else: 157 | text_array.append (i) 158 | index_array.append (0) 159 | 160 | if flag == 1: 161 | cad = text_array[-1] 162 | text_array.pop() 163 | cad = cad + str(',') 164 | text_array.append (cad) 165 | flag = 0 166 | if flag == 2: 167 | cad = text_array[-1] 168 | text_array.pop() 169 | cad = cad + str('.') 170 | text_array.append (cad) 171 | flag = 0 172 | 173 | newText = listToString(text_array) 174 | print(newText) 175 | print(index_array) 176 | 177 | # Parameters for the GA 178 | function_inputs = index_array 179 | num_generations = 100 # Number of generations 180 | num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool 181 | sol_per_pop = 20 # Number of solutions in the population 182 | num_genes = len(function_inputs) # Number of genes 183 | 184 | # Initialize the GA instance without the 'on_generation' argument 185 | ga_instance = pygad.GA(num_generations=1, # Set to 1 because we are controlling the generations manually 186 | num_parents_mating=num_parents_mating, 187 | sol_per_pop=sol_per_pop, 188 | num_genes=num_genes, 189 | fitness_func=fitness_func) 190 | 191 | last_fitness = 0 # Initialize last fitness for comparison 192 | 193 | # Manually iterate through generations 194 | for generation in range(num_generations): 195 | ga_instance.run() # Run GA for one generation 196 | 197 | # Getting the best solution after the current generation 198 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 199 | 200 | print("Generation = {}".format(generation + 1)) 201 | print("Fitness = {}".format(solution_fitness)) 202 | print("Change = {}".format(solution_fitness - last_fitness)) 203 | 204 | last_fitness = solution_fitness # Update the last fitness value 205 | 206 | # At this point, the GA has completed all generations 207 | # You can directly get the best solution details without passing any arguments 208 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 209 | print("Parameters of the best solution : {solution}".format(solution=solution)) 210 | print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) 211 | print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) 212 | 213 | new_text = correct_mistakes(obtain_text(solution)) 214 | rr = Readability(new_text) 215 | print(new_text) 216 | print("Difference " + str(initial_score - rr.flesch_kincaid().score)) 217 | -------------------------------------------------------------------------------- /comparison/web/nsga2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective import NSGAII 11 | from jmetal.operator import SBXCrossover, PolynomialMutation 12 | from jmetal.util.termination_criterion import StoppingByEvaluations 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from jmetal.core.problem import FloatProblem 16 | from jmetal.core.solution import FloatSolution 17 | import gensim.downloader as api 18 | import gensim.downloader 19 | import requests 20 | 21 | model = api.load('word2vec-google-news-300') 22 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 23 | 24 | 25 | def calculate_hypervolume(pareto_front, ref_point): 26 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 27 | hypervolume = 0.0 28 | prev_point = [0.0, ref_point[1]] 29 | 30 | for point in sorted_front: 31 | if point[1] < prev_point[1]: 32 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 33 | prev_point = point 34 | 35 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 36 | return hypervolume 37 | 38 | def listToString(s): 39 | str1 = "" 40 | for ele in s: 41 | str1 += str(ele) 42 | str1 += " " 43 | 44 | str1 = str1.replace(' ,', ',') 45 | str1 = str1.replace('_', ' ') 46 | return str1 47 | 48 | def Synonym(word, number): 49 | synonyms = [] 50 | 51 | if (Dict.get(word) is not None): 52 | synonyms = Dict.get(word) 53 | 54 | if (not synonyms): 55 | return -2, word 56 | elif number >= len(synonyms): 57 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 58 | else: 59 | return int(number), synonyms[int(number-1)][0] 60 | 61 | def fitness_func1(solution): 62 | #preprocessing 63 | a = 0 64 | for i in index_array: 65 | if index_array[a] <= 0: 66 | solution[a] = 0 67 | a += 1 68 | 69 | res2 = text.split() 70 | text_converted = [] 71 | index=0 72 | for i in res2: 73 | if solution[index] < 1: 74 | text_converted.append (i) 75 | elif solution[index] >= 1: 76 | number, word = Synonym(i,solution[index]) 77 | text_converted.append (word) 78 | else: 79 | print ('Error') 80 | index += 1 81 | 82 | result = listToString(text_converted) 83 | r = Readability(result) 84 | return r.ari().score 85 | 86 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 87 | 88 | 89 | #Creates a dictionary in order to store all the synonyms in main memory 90 | resource = text.split() 91 | Dict = {} 92 | for i in resource: 93 | if ',' in i: 94 | i = i.replace(',', '') 95 | if '.' in i: 96 | i = i.replace('.', '') 97 | 98 | if i in Dict.keys(): 99 | print ("Processing...Please wait") 100 | else: 101 | if (not i[0].isupper() and len(i) > 3): 102 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 103 | req = requests.get(str1) 104 | try: 105 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 106 | except TypeError as e: 107 | print ("Processing...Please wait") 108 | dict_synonyms = None 109 | 110 | if dict_synonyms is not None: 111 | synonyms = [r["term"] for r in dict_synonyms] 112 | if synonyms: 113 | Dict[i] = [] 114 | Dict[i] = synonyms 115 | 116 | text_array = [] 117 | index_array = [] 118 | 119 | res = text.split() 120 | for i in res: 121 | flag = 0 122 | if ',' in i: 123 | i = i.replace(',', '') 124 | flag = 1 125 | if '.' in i: 126 | i = i.replace('.', '') 127 | flag = 2 128 | 129 | if (not i[0].isupper() and len(i) > 3): 130 | number, word = Synonym(i,6) 131 | text_array.append (word) 132 | index_array.append (number) 133 | else: 134 | text_array.append (i) 135 | index_array.append (0) 136 | 137 | if flag == 1: 138 | cad = text_array[-1] 139 | text_array.pop() 140 | cad = cad + str(',') 141 | text_array.append (cad) 142 | flag = 0 143 | if flag == 2: 144 | cad = text_array[-1] 145 | text_array.pop() 146 | cad = cad + str('.') 147 | text_array.append (cad) 148 | flag = 0 149 | 150 | def obtain_text (solution): 151 | res2 = text.split() 152 | text_converted = [] 153 | index=0 154 | for i in res2: 155 | if solution[index] < 1: 156 | text_converted.append (i) 157 | elif solution[index] >= 1: 158 | number, word = Synonym(i,solution[index]) 159 | text_converted.append (word.upper()) 160 | else: 161 | print ('Error') 162 | index += 1 163 | 164 | result = listToString(text_converted) 165 | return result 166 | 167 | 168 | class Oruga(FloatProblem): 169 | 170 | def __init__(self): 171 | super(Oruga, self).__init__() 172 | self.number_of_objectives = 3 173 | self.number_of_variables = len(index_array) 174 | self.number_of_constraints = 0 175 | 176 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE] 177 | self.obj_labels = ['f(x)', 'f(y)'] 178 | 179 | self.lower_bound = self.number_of_variables * [-4] 180 | self.upper_bound = self.number_of_variables * [4] 181 | 182 | FloatSolution.lower_bound = self.lower_bound 183 | FloatSolution.upper_bound = self.upper_bound 184 | 185 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 186 | 187 | source = text 188 | target = obtain_text(solution.variables) 189 | 190 | solution.objectives[2] = float (model.wmdistance(source, target)) 191 | solution.objectives[1] = fitness_func1(solution.variables) 192 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 193 | 194 | return solution 195 | 196 | 197 | def get_name(self): 198 | return 'Oruga' 199 | 200 | max_evaluations = 3000 201 | problem = Oruga() 202 | algorithm = NSGAII( 203 | problem=problem, 204 | population_size=20, 205 | offspring_population_size=30, 206 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 207 | crossover=SBXCrossover(probability=1.0, distribution_index=20), 208 | termination_criterion=StoppingByEvaluations(max_evaluations=800) 209 | ) 210 | 211 | algorithm.run() 212 | 213 | from jmetal.util.solution import get_non_dominated_solutions 214 | 215 | front = get_non_dominated_solutions(algorithm.get_result()) 216 | 217 | # Print number of solutions 218 | print (len(front)) 219 | 220 | # Print time 221 | print (algorithm.total_computing_time) 222 | 223 | # Define the reference point (maximum values for each objective) 224 | ref_point = [30.0, 20.0, 1.0] 225 | 226 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 227 | 228 | print("Pareto Front:") 229 | for point in pareto_front: 230 | print(point) 231 | 232 | # Calculate the hypervolume 233 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 234 | print("Hypervolume:", hypervolume) -------------------------------------------------------------------------------- /comparison/web/pso.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Particle Swarm Optimization (PSO) 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | from jmetal.algorithm.multiobjective.smpso import SMPSO 11 | from jmetal.operator import PolynomialMutation 12 | from jmetal.util.archive import CrowdingDistanceArchive 13 | from jmetal.util.termination_criterion import StoppingByEvaluations 14 | from jmetal.core.problem import FloatProblem 15 | from jmetal.core.solution import FloatSolution 16 | from jmetal.util.solution import get_non_dominated_solutions 17 | from nltk.corpus import wordnet 18 | from readability import Readability 19 | import gensim.downloader as api 20 | import gensim.downloader 21 | import requests 22 | 23 | model = api.load('word2vec-google-news-300') 24 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 25 | 26 | def calculate_hypervolume(pareto_front, ref_point): 27 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 28 | hypervolume = 0.0 29 | prev_point = [0.0, ref_point[1]] 30 | 31 | for point in sorted_front: 32 | if point[1] < prev_point[1]: 33 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 34 | prev_point = point 35 | 36 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 37 | return hypervolume 38 | 39 | 40 | 41 | def listToString(s): 42 | str1 = "" 43 | for ele in s: 44 | str1 += str(ele) 45 | str1 += " " 46 | 47 | str1 = str1.replace(' ,', ',') 48 | str1 = str1.replace('_', ' ') 49 | return str1 50 | 51 | def Synonym(word, number): 52 | synonyms = [] 53 | 54 | if (Dict.get(word) is not None): 55 | synonyms = Dict.get(word) 56 | 57 | if (not synonyms): 58 | return -2, word 59 | elif number >= len(synonyms): 60 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 61 | else: 62 | return int(number), synonyms[int(number-1)][0] 63 | 64 | def fitness_func1(solution): 65 | # preprocessing 66 | a = 0 67 | for i in index_array: 68 | if index_array[a] <= 0: 69 | solution[a] = 0 70 | a += 1 71 | 72 | res2 = text.split() 73 | text_converted = [] 74 | index=0 75 | for i in res2: 76 | if solution[index] < 1: 77 | text_converted.append (i) 78 | elif solution[index] >= 1: 79 | number, word = Synonym(i,solution[index]) 80 | text_converted.append (word) 81 | else: 82 | print ('Error') 83 | index += 1 84 | 85 | result = listToString(text_converted) 86 | r = Readability(result) 87 | return r.flesch_kincaid().score 88 | 89 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 90 | 91 | #Creates a dictionary in order to store all the synonyms in main memory 92 | resource = text.split() 93 | Dict = {} 94 | for i in resource: 95 | if ',' in i: 96 | i = i.replace(',', '') 97 | if '.' in i: 98 | i = i.replace('.', '') 99 | 100 | if i in Dict.keys(): 101 | print ("Processing...Please wait") 102 | else: 103 | if (not i[0].isupper() and len(i) > 3): 104 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 105 | req = requests.get(str1) 106 | try: 107 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 108 | except TypeError as e: 109 | print ("Processing...Please wait") 110 | dict_synonyms = None 111 | 112 | if dict_synonyms is not None: 113 | synonyms = [r["term"] for r in dict_synonyms] 114 | if synonyms: 115 | Dict[i] = [] 116 | Dict[i] = synonyms 117 | 118 | text_array = [] 119 | index_array = [] 120 | 121 | res = text.split() 122 | for i in res: 123 | flag = 0 124 | if ',' in i: 125 | i = i.replace(',', '') 126 | flag = 1 127 | if '.' in i: 128 | i = i.replace('.', '') 129 | flag = 2 130 | 131 | if (not i[0].isupper() and len(i) > 3): 132 | number, word = Synonym(i,6) 133 | text_array.append (word) 134 | index_array.append (number) 135 | else: 136 | text_array.append (i) 137 | index_array.append (0) 138 | 139 | if flag == 1: 140 | cad = text_array[-1] 141 | text_array.pop() 142 | cad = cad + str(',') 143 | text_array.append (cad) 144 | flag = 0 145 | if flag == 2: 146 | cad = text_array[-1] 147 | text_array.pop() 148 | cad = cad + str('.') 149 | text_array.append (cad) 150 | flag = 0 151 | 152 | def obtain_text (solution): 153 | res2 = text.split() 154 | text_converted = [] 155 | index=0 156 | for i in res2: 157 | if solution[index] < 1: 158 | text_converted.append (i) 159 | elif solution[index] >= 1: 160 | number, word = Synonym(i,solution[index]) 161 | text_converted.append (word.upper()) 162 | else: 163 | print ('Error') 164 | index += 1 165 | 166 | result = listToString(text_converted) 167 | return result 168 | 169 | 170 | class Oruga(FloatProblem): 171 | 172 | def __init__(self): 173 | super(Oruga, self).__init__() 174 | self.number_of_objectives = 3 175 | self.number_of_variables = len(index_array) 176 | self.number_of_constraints = 0 177 | 178 | self.obj_directions = [self.MINIMIZE, self.MINIMIZE, self.MINIMIZE] 179 | self.obj_labels = ['f(x)', 'f(y)', 'f(z)'] 180 | 181 | self.lower_bound = self.number_of_variables * [-4] 182 | self.upper_bound = self.number_of_variables * [4] 183 | 184 | FloatSolution.lower_bound = self.lower_bound 185 | FloatSolution.upper_bound = self.upper_bound 186 | 187 | def evaluate(self, solution: FloatSolution) -> FloatSolution: 188 | 189 | source = text 190 | target = obtain_text(solution.variables) 191 | 192 | solution.objectives[2] = float(model.wmdistance(source, target)) 193 | solution.objectives[1] = fitness_func1(solution.variables) 194 | solution.objectives[0] = len([1 for i in solution.variables if i >= 1]) 195 | 196 | return solution 197 | 198 | def get_name(self): 199 | return 'Oruga' 200 | 201 | problem = Oruga() 202 | 203 | max_evaluations = 100 204 | algorithm = SMPSO( 205 | problem=problem, 206 | swarm_size=100, 207 | mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20), 208 | leaders=CrowdingDistanceArchive(100), 209 | termination_criterion=StoppingByEvaluations(max_evaluations) 210 | ) 211 | 212 | algorithm.run() 213 | solutions = algorithm.get_result() 214 | front = get_non_dominated_solutions(solutions) 215 | 216 | # Print number of solutions 217 | print (len(front)) 218 | 219 | # Print time 220 | print (algorithm.total_computing_time) 221 | 222 | # Define the reference point (maximum values for each objective) 223 | ref_point = [30.0, 20.0, 1.0] 224 | 225 | pareto_front = [[solution.objectives[0], solution.objectives[1], solution.objectives[2]] for solution in front] 226 | 227 | print("Pareto Front:") 228 | for point in pareto_front: 229 | print(point) 230 | 231 | # Calculate the hypervolume 232 | hypervolume = calculate_hypervolume(pareto_front, ref_point) 233 | print("Hypervolume:", hypervolume) 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /comparison/word2vec/cs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import gensim.downloader as api 6 | import gensim.downloader 7 | import time 8 | start_time = time.time() # Record the starting time 9 | 10 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 11 | model = api.load('word2vec-google-news-300') 12 | Dict = {} 13 | 14 | # Your existing imports here 15 | def calculate_hypervolume(pareto_front, ref_point): 16 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 17 | hypervolume = 0.0 18 | prev_point = [0.0, ref_point[1]] 19 | 20 | for point in sorted_front: 21 | if point[1] < prev_point[1]: 22 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 23 | prev_point = point 24 | 25 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 26 | return hypervolume 27 | 28 | 29 | 30 | def listToString(s): 31 | str1 = "" 32 | for ele in s: 33 | str1 += str(ele) 34 | str1 += " " 35 | 36 | str1 = str1.replace(' ,', ',') 37 | str1 = str1.replace('_', ' ') 38 | return str1 39 | 40 | def Synonym(word, number): 41 | synonyms = [] 42 | 43 | if (Dict.get(word) is not None): 44 | synonyms = Dict.get(word) 45 | 46 | if (not synonyms): 47 | return -2, word 48 | elif number >= len(synonyms): 49 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 50 | else: 51 | return int(number), synonyms[int(number-1)][0] 52 | 53 | def fitness_func1(solution): 54 | print (solution) 55 | 56 | #preprocessing 57 | a = 0 58 | for i in index_array: 59 | if index_array[a] <= 0: 60 | solution[a] = 0 61 | a += 1 62 | 63 | res2 = text.split() 64 | text_converted = [] 65 | index=0 66 | for i in res2: 67 | if solution[index] < 1: 68 | text_converted.append (i) 69 | elif solution[index] >= 1: 70 | number, word = Synonym(i,solution[index]) 71 | text_converted.append (word) 72 | else: 73 | print ('Error') 74 | index += 1 75 | 76 | result = listToString(text_converted) 77 | r = Readability(result) 78 | return r.ari().score 79 | 80 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 81 | 82 | #Creates a dictionary in order to store all the synonyms in main memory 83 | resource = text.split() 84 | Dict = {} 85 | for i in resource: 86 | if ',' in i: 87 | i = i.replace(',', '') 88 | if '.' in i: 89 | i = i.replace('.', '') 90 | 91 | if (not i[0].isupper() and len(i) > 3): 92 | if i in Dict.keys(): 93 | print ("Processing...Please wait") 94 | else: 95 | try: 96 | synonyms = google_news_vectors.most_similar(i, topn=6) 97 | except KeyError as e: 98 | print (e) 99 | synonyms = None 100 | if synonyms is not None: 101 | Dict[i] = [] 102 | Dict[i] = synonyms 103 | 104 | text_array = [] 105 | index_array = [] 106 | 107 | res = text.split() 108 | for i in res: 109 | flag = 0 110 | if ',' in i: 111 | i = i.replace(',', '') 112 | flag = 1 113 | if '.' in i: 114 | i = i.replace('.', '') 115 | flag = 2 116 | 117 | if (not i[0].isupper() and len(i) > 3): 118 | number, word = Synonym(i,6) 119 | text_array.append (word) 120 | index_array.append (number) 121 | else: 122 | text_array.append (i) 123 | index_array.append (0) 124 | 125 | if flag == 1: 126 | cad = text_array[-1] 127 | text_array.pop() 128 | cad = cad + str(',') 129 | text_array.append (cad) 130 | flag = 0 131 | if flag == 2: 132 | cad = text_array[-1] 133 | text_array.pop() 134 | cad = cad + str('.') 135 | text_array.append (cad) 136 | flag = 0 137 | 138 | def obtain_text (solution): 139 | res2 = text.split() 140 | text_converted = [] 141 | index=0 142 | for i in res2: 143 | if solution[index] < 1: 144 | text_converted.append (i) 145 | elif solution[index] >= 1: 146 | number, word = Synonym(i,solution[index]) 147 | text_converted.append (word.upper()) 148 | else: 149 | print ('Error') 150 | index += 1 151 | 152 | result = listToString(text_converted) 153 | return result 154 | 155 | # Define the multi-objective optimization problem 156 | def evaluate(x): 157 | source = text 158 | target = obtain_text(x) 159 | 160 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 161 | 162 | def generate_random_solution(bounds): 163 | return [random.uniform(bounds[i][0], bounds[i][1]) for i in range(len(bounds))] 164 | 165 | def levy_flight(beta): 166 | return np.power((1.0 / np.random.gamma(1.0 + beta)), 1.0 / beta) 167 | 168 | def cuckoo_search_multiobjective(bounds, generations, population_size, pa): 169 | dim = len(bounds) 170 | population = [generate_random_solution(bounds) for _ in range(population_size)] 171 | 172 | for gen in range(generations): 173 | population.sort(key=lambda x: evaluate(x)) 174 | new_population = population[:population_size//2] 175 | 176 | for _ in range(population_size - population_size//2): 177 | if random.random() < pa: 178 | selected_cuckoo = random.choice(new_population) 179 | cuckoo = [x + levy_flight(1.5) * (x - y) for x, y in zip(selected_cuckoo, population[random.randint(0, population_size//2-1)])] 180 | cuckoo = np.clip(cuckoo, bounds[:, 0], bounds[:, 1]) 181 | new_population.append(cuckoo) 182 | else: 183 | new_population.append(generate_random_solution(bounds)) 184 | 185 | population = new_population 186 | 187 | population.sort(key=lambda x: evaluate(x)) 188 | 189 | pareto_front = [] 190 | for ind in population: 191 | dominated = False 192 | to_remove = [] 193 | for idx, existing in enumerate(pareto_front): 194 | if all(a <= b for a, b in zip(existing, ind)): 195 | to_remove.append(idx) 196 | elif all(a >= b for a, b in zip(existing, ind)): 197 | dominated = True 198 | break 199 | if not dominated: 200 | pareto_front = [existing for idx, existing in enumerate(pareto_front) if idx not in to_remove] 201 | pareto_front.append(ind) 202 | 203 | return pareto_front 204 | 205 | 206 | if __name__ == "__main__": 207 | random.seed(42) 208 | np.random.seed(42) 209 | 210 | individual_length = len(index_array) # Length of the individual (example value) 211 | bounds = np.array([[-5, 5]] * individual_length) # Example bounds for variables 212 | 213 | generations = 40 214 | population_size = 20 215 | pa = 0.25 216 | 217 | pareto_front = cuckoo_search_multiobjective(bounds, generations, population_size, pa) 218 | 219 | front = [] 220 | for ind in pareto_front: 221 | print (evaluate(ind)) 222 | front.append (evaluate(ind)) 223 | 224 | end_time = time.time() # Record the ending time 225 | elapsed_time = end_time - start_time # Calculate the elapsed time 226 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 227 | 228 | ref_point = [30.0, 20.0, 1.0] 229 | hypervolume = calculate_hypervolume(front, ref_point) 230 | print("Hypervolume:", hypervolume) 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | -------------------------------------------------------------------------------- /oruga_word2vec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | #print(r.flesch_kincaid().score) 11 | #print(r.flesch().score) 12 | #print(r.gunning_fog()) 13 | #print(r.coleman_liau()) 14 | #print(r.dale_chall()) 15 | #print(r.ari()) 16 | #print(r.linsear_write()) 17 | #print(r.spache()) 18 | 19 | #Coding of individuals 20 | #-2, candidate but not synonym 21 | #-1, special character (if necessary) 22 | #0, not candidate 23 | #1, replaced by 1st option 24 | #2, replaced by 2nd option 25 | #N, replaced by Nth option 26 | 27 | # Modules 28 | import pygad 29 | import gensim 30 | import gensim.downloader 31 | import language_tool_python 32 | from readability import Readability 33 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 34 | 35 | text_array = [] 36 | index_array = [] 37 | 38 | #text 39 | text = 'Austria emerged from the remnants of the Eastern and Hungarian March at the end of the first millennium. Originally a margraviate of Bavaria, it developed into a duchy of the Holy Roman Empire in 1156 and was later made an archduchy in 1453. In the 16th century, Vienna began serving as the empire administrative capital and Austria thus became the heartland of the Habsburg monarchy. After the dissolution of the Holy Roman Empire in 1806, Austria established its own empire, which became a great power and the dominant member of the German Confederation. The defeat in the Austro-Prussian War of 1866 led to the end of the Confederation and paved the way for the establishment of Austria-Hungary a year later.' 40 | 41 | r = Readability(text) 42 | initial_score = r.flesch_kincaid().score 43 | 44 | #Creates a dictionary in order to store all the synonyms in main memory 45 | resource = text.split() 46 | Dict = {} 47 | for i in resource: 48 | if ',' in i: 49 | i = i.replace(',', '') 50 | if '.' in i: 51 | i = i.replace('.', '') 52 | 53 | if (not i[0].isupper() and len(i) > 3): 54 | if i in Dict.keys(): 55 | print ("Processing...Please wait") 56 | else: 57 | try: 58 | synonyms = google_news_vectors.most_similar(i, topn=6) 59 | except KeyError as e: 60 | print (e) 61 | synonyms = None 62 | if synonyms is not None: 63 | Dict[i] = [] 64 | Dict[i] = synonyms 65 | 66 | def listToString(s): 67 | str1 = "" 68 | for ele in s: 69 | str1 += str(ele) 70 | str1 += " " 71 | 72 | str1 = str1.replace(' ,', ',') 73 | str1 = str1.replace('_', ' ') 74 | return str1 75 | 76 | def correct_mistakes (text): 77 | my_tool = language_tool_python.LanguageTool('en-US') 78 | my_text = text 79 | my_matches = my_tool.check(my_text) 80 | 81 | myMistakes = [] 82 | myCorrections = [] 83 | startPositions = [] 84 | endPositions = [] 85 | 86 | # using the for-loop 87 | for rules in my_matches: 88 | if len(rules.replacements) > 0: 89 | startPositions.append(rules.offset) 90 | endPositions.append(rules.errorLength + rules.offset) 91 | myMistakes.append(my_text[rules.offset : rules.errorLength + rules.offset]) 92 | myCorrections.append(rules.replacements[0]) 93 | 94 | # creating new object 95 | my_NewText = list(my_text) 96 | 97 | # rewriting the correct passage 98 | for n in range(len(startPositions)): 99 | for i in range(len(my_text)): 100 | my_NewText[startPositions[n]] = myCorrections[n] 101 | if (i > startPositions[n] and i < endPositions[n]): 102 | my_NewText[i] = "" 103 | 104 | my_NewText = "".join(my_NewText) 105 | 106 | return my_NewText 107 | 108 | def Synonym(word, number): 109 | synonyms = [] 110 | 111 | if (Dict.get(word) is not None): 112 | synonyms = Dict.get(word) 113 | 114 | if (not synonyms): 115 | return -2, word 116 | elif number >= len(synonyms): 117 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 118 | else: 119 | return int(number), synonyms[int(number-1)][0] 120 | 121 | def obtain_text (solution): 122 | res2 = text.split() 123 | text_converted = [] 124 | index=0 125 | for i in res2: 126 | if solution[index] < 1: 127 | text_converted.append (i) 128 | elif solution[index] >= 1: 129 | number, word = Synonym(i,solution[index]) 130 | text_converted.append (word.upper()) 131 | else: 132 | print ('Error') 133 | index += 1 134 | 135 | result = listToString(text_converted) 136 | return result 137 | 138 | def fitness_func(solution, solution_idx): 139 | 140 | #preprocessing 141 | a = 0 142 | for i in index_array: 143 | if index_array[a] <= 0: 144 | solution[a] = 0 145 | a += 1 146 | 147 | res2 = text.split() 148 | text_converted = [] 149 | index=0 150 | for i in res2: 151 | if solution[index] < 1: 152 | text_converted.append (i) 153 | elif solution[index] >= 1: 154 | number, word = Synonym(i,solution[index]) 155 | text_converted.append (word.upper()) 156 | else: 157 | print ('Error') 158 | index += 1 159 | 160 | result = listToString(text_converted) 161 | r = Readability(result) 162 | return r.flesch_kincaid().score * -1 163 | 164 | print (text) 165 | res = text.split() 166 | 167 | for i in res: 168 | flag = 0 169 | if ',' in i: 170 | i = i.replace(',', '') 171 | flag = 1 172 | if '.' in i: 173 | i = i.replace('.', '') 174 | flag = 2 175 | 176 | if (not i[0].isupper() and len(i) > 3 and i[-2:] != 'ed'): 177 | number, word = Synonym(i,6) 178 | text_array.append (word) 179 | index_array.append (number) 180 | else: 181 | text_array.append (i) 182 | index_array.append (0) 183 | 184 | if flag == 1: 185 | cad = str(text_array[-1]) 186 | text_array.pop() 187 | cad = cad + str(',') 188 | text_array.append (cad) 189 | flag = 0 190 | if flag == 2: 191 | cad = str(text_array[-1]) 192 | text_array.pop() 193 | cad = cad + str('.') 194 | text_array.append (cad) 195 | flag = 0 196 | 197 | newText = listToString(text_array) 198 | print(newText) 199 | print(index_array) 200 | 201 | # Parameters for the GA 202 | function_inputs = index_array 203 | num_generations = 100 # Number of generations 204 | num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool 205 | sol_per_pop = 20 # Number of solutions in the population 206 | num_genes = len(function_inputs) # Number of genes 207 | 208 | # Initialize the GA instance without the 'on_generation' argument 209 | ga_instance = pygad.GA(num_generations=1, # Set to 1 because we are controlling the generations manually 210 | num_parents_mating=num_parents_mating, 211 | sol_per_pop=sol_per_pop, 212 | num_genes=num_genes, 213 | fitness_func=fitness_func) 214 | 215 | last_fitness = 0 # Initialize last fitness for comparison 216 | 217 | # Manually iterate through generations 218 | for generation in range(num_generations): 219 | ga_instance.run() # Run GA for one generation 220 | 221 | # Getting the best solution after the current generation 222 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 223 | 224 | print("Generation = {}".format(generation + 1)) 225 | print("Fitness = {}".format(solution_fitness)) 226 | print("Change = {}".format(solution_fitness - last_fitness)) 227 | 228 | last_fitness = solution_fitness # Update the last fitness value 229 | 230 | # At this point, the GA has completed all generations 231 | # You can directly get the best solution details without passing any arguments 232 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 233 | print("Parameters of the best solution : {solution}".format(solution=solution)) 234 | print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) 235 | print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) 236 | 237 | new_text = correct_mistakes(obtain_text(solution)) 238 | rr = Readability(new_text) 239 | print (new_text) 240 | print ("Difference " + str(initial_score - rr.flesch_kincaid().score)) -------------------------------------------------------------------------------- /oruga_massive_experiments.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | import csv 11 | import pygad 12 | import language_tool_python 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | 16 | def main(): 17 | 18 | text_array = [] 19 | index_array = [] 20 | text = "" 21 | global last_fitness 22 | 23 | def listToString(s): 24 | str1 = "" 25 | for ele in s: 26 | str1 += str(ele) 27 | str1 += " " 28 | 29 | str1 = str1.replace(' ,', ',') 30 | str1 = str1.replace('_', ' ') 31 | return str1 32 | 33 | def Synonym(word, number): 34 | synonyms = [] 35 | for syn in wordnet.synsets(word): 36 | for lm in syn.lemmas(): 37 | synonyms.append(lm.name()) 38 | 39 | if (not synonyms): 40 | return -2, word 41 | elif number >= len(synonyms): 42 | return len(synonyms)-1, synonyms[len(synonyms)-1] 43 | else: 44 | return int(number), synonyms[int(number-1)] 45 | 46 | def obtain_text (solution): 47 | res2 = text.split() 48 | text_converted = [] 49 | index=0 50 | for i in res2: 51 | if solution[index] < 1: 52 | text_converted.append (i) 53 | elif solution[index] >= 1: 54 | number, word = Synonym(i,solution[index]) 55 | text_converted.append (word.upper()) 56 | else: 57 | print ('Error') 58 | index += 1 59 | 60 | result = listToString(text_converted) 61 | return result 62 | 63 | def correct_mistakes (text): 64 | my_tool = language_tool_python.LanguageTool('en-US') 65 | my_text = text 66 | my_matches = my_tool.check(my_text) 67 | 68 | myMistakes = [] 69 | myCorrections = [] 70 | startPositions = [] 71 | endPositions = [] 72 | 73 | # using the for-loop 74 | for rules in my_matches: 75 | if len(rules.replacements) > 0: 76 | startPositions.append(rules.offset) 77 | endPositions.append(rules.errorLength + rules.offset) 78 | myMistakes.append(my_text[rules.offset : rules.errorLength + rules.offset]) 79 | myCorrections.append(rules.replacements[0]) 80 | 81 | # creating new object 82 | my_NewText = list(my_text) 83 | 84 | # rewriting the correct passage 85 | for n in range(len(startPositions)): 86 | for i in range(len(my_text)): 87 | my_NewText[startPositions[n]] = myCorrections[n] 88 | if (i > startPositions[n] and i < endPositions[n]): 89 | my_NewText[i] = "" 90 | 91 | my_NewText = "".join(my_NewText) 92 | 93 | return my_NewText 94 | 95 | 96 | def fitness_func(solution, solution_idx): 97 | 98 | #preprocessing 99 | a = 0 100 | for i in index_array: 101 | if index_array[a] <= 0: 102 | solution[a] = 0 103 | a += 1 104 | 105 | res2 = text.split() 106 | text_converted = [] 107 | index=0 108 | for i in res2: 109 | if solution[index] < 1: 110 | text_converted.append (i) 111 | elif solution[index] >= 1: 112 | number, word = Synonym(i,solution[index]) 113 | text_converted.append (word) 114 | else: 115 | print ('Error') 116 | index += 1 117 | 118 | result = listToString(text_converted) 119 | r = Readability(result) 120 | return r.flesch_kincaid().score * -1 121 | 122 | def on_generation(ga_instance): 123 | 124 | print("Generation = {generation}".format(generation=ga_instance.generations_completed)) 125 | print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) 126 | print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) 127 | ast_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] 128 | 129 | with open('texts.txt', 'r') as fd: 130 | reader = csv.reader(fd) 131 | for row in reader: 132 | text = ''.join(row) 133 | print (text) 134 | r = Readability(text) 135 | initial_score = r.flesch_kincaid().score 136 | res = text.split() 137 | 138 | for i in res: 139 | flag = 0 140 | if ',' in i: 141 | i = i.replace(',', '') 142 | flag = 1 143 | if '.' in i: 144 | i = i.replace('.', '') 145 | flag = 2 146 | 147 | if (not i[0].isupper() and len(i) > 3): 148 | number, word = Synonym(i,6) 149 | text_array.append (word) 150 | index_array.append (number) 151 | else: 152 | text_array.append (i) 153 | index_array.append (0) 154 | 155 | if flag == 1: 156 | cad = text_array[-1] 157 | text_array.pop() 158 | cad = cad + str(',') 159 | text_array.append (cad) 160 | flag = 0 161 | if flag == 2: 162 | cad = text_array[-1] 163 | text_array.pop() 164 | cad = cad + str('.') 165 | text_array.append (cad) 166 | flag = 0 167 | 168 | newText = listToString(text_array) 169 | #print(newText) 170 | print(index_array) 171 | 172 | # Parameters for the GA 173 | function_inputs = index_array 174 | num_generations = 100 # Number of generations 175 | num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool 176 | sol_per_pop = 20 # Number of solutions in the population 177 | num_genes = len(function_inputs) # Number of genes 178 | 179 | # Initialize the GA instance without the 'on_generation' argument 180 | ga_instance = pygad.GA(num_generations=1, # Set to 1 because we are controlling the generations manually 181 | num_parents_mating=num_parents_mating, 182 | sol_per_pop=sol_per_pop, 183 | num_genes=num_genes, 184 | fitness_func=fitness_func) 185 | 186 | last_fitness = 0 # Initialize last fitness for comparison 187 | 188 | # Manually iterate through generations 189 | for generation in range(num_generations): 190 | ga_instance.run() # Run GA for one generation 191 | 192 | # Getting the best solution after the current generation 193 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 194 | 195 | print("Generation = {}".format(generation + 1)) 196 | print("Fitness = {}".format(solution_fitness)) 197 | print("Change = {}".format(solution_fitness - last_fitness)) 198 | 199 | last_fitness = solution_fitness # Update the last fitness value 200 | 201 | # At this point, the GA has completed all generations 202 | # You can directly get the best solution details without passing any arguments 203 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 204 | print("Parameters of the best solution : {solution}".format(solution=solution)) 205 | print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) 206 | print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) 207 | 208 | new_text = correct_mistakes(obtain_text(solution)) 209 | rr = Readability(new_text) 210 | 211 | with open('results.txt', 'a') as the_file: 212 | the_file.write("Difference " + str(initial_score - rr.flesch_kincaid().score) + str('\n')) 213 | 214 | 215 | 216 | if __name__ == "__main__": 217 | for x in range(10): 218 | main() -------------------------------------------------------------------------------- /comparison/web/cs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import gensim.downloader as api 6 | import gensim.downloader 7 | import requests 8 | import time 9 | 10 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 11 | model = api.load('word2vec-google-news-300') 12 | Dict = {} 13 | 14 | # Your existing imports here 15 | def calculate_hypervolume(pareto_front, ref_point): 16 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 17 | hypervolume = 0.0 18 | prev_point = [0.0, ref_point[1]] 19 | 20 | for point in sorted_front: 21 | if point[1] < prev_point[1]: 22 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 23 | prev_point = point 24 | 25 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 26 | return hypervolume 27 | 28 | 29 | 30 | def listToString(s): 31 | str1 = "" 32 | for ele in s: 33 | str1 += str(ele) 34 | str1 += " " 35 | 36 | str1 = str1.replace(' ,', ',') 37 | str1 = str1.replace('_', ' ') 38 | return str1 39 | 40 | def Synonym(word, number): 41 | synonyms = [] 42 | 43 | if (Dict.get(word) is not None): 44 | synonyms = Dict.get(word) 45 | 46 | if (not synonyms): 47 | return -2, word 48 | elif number >= len(synonyms): 49 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 50 | else: 51 | return int(number), synonyms[int(number-1)][0] 52 | 53 | def fitness_func1(solution): 54 | print (solution) 55 | 56 | #preprocessing 57 | a = 0 58 | for i in index_array: 59 | if index_array[a] <= 0: 60 | solution[a] = 0 61 | a += 1 62 | 63 | res2 = text.split() 64 | text_converted = [] 65 | index=0 66 | for i in res2: 67 | if solution[index] < 1: 68 | text_converted.append (i) 69 | elif solution[index] >= 1: 70 | number, word = Synonym(i,solution[index]) 71 | text_converted.append (word) 72 | else: 73 | print ('Error') 74 | index += 1 75 | 76 | result = listToString(text_converted) 77 | r = Readability(result) 78 | return r.ari().score 79 | 80 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 81 | 82 | #Creates a dictionary in order to store all the synonyms in main memory 83 | resource = text.split() 84 | Dict = {} 85 | for i in resource: 86 | if ',' in i: 87 | i = i.replace(',', '') 88 | if '.' in i: 89 | i = i.replace('.', '') 90 | 91 | if i in Dict.keys(): 92 | print ("Processing...Please wait") 93 | else: 94 | if (not i[0].isupper() and len(i) > 3): 95 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 96 | req = requests.get(str1) 97 | try: 98 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 99 | except TypeError as e: 100 | print ("Processing...Please wait") 101 | dict_synonyms = None 102 | 103 | if dict_synonyms is not None: 104 | synonyms = [r["term"] for r in dict_synonyms] 105 | if synonyms: 106 | Dict[i] = [] 107 | Dict[i] = synonyms 108 | 109 | text_array = [] 110 | index_array = [] 111 | 112 | res = text.split() 113 | for i in res: 114 | flag = 0 115 | if ',' in i: 116 | i = i.replace(',', '') 117 | flag = 1 118 | if '.' in i: 119 | i = i.replace('.', '') 120 | flag = 2 121 | 122 | if (not i[0].isupper() and len(i) > 3): 123 | number, word = Synonym(i,6) 124 | text_array.append (word) 125 | index_array.append (number) 126 | else: 127 | text_array.append (i) 128 | index_array.append (0) 129 | 130 | if flag == 1: 131 | cad = text_array[-1] 132 | text_array.pop() 133 | cad = cad + str(',') 134 | text_array.append (cad) 135 | flag = 0 136 | if flag == 2: 137 | cad = text_array[-1] 138 | text_array.pop() 139 | cad = cad + str('.') 140 | text_array.append (cad) 141 | flag = 0 142 | 143 | def obtain_text (solution): 144 | res2 = text.split() 145 | text_converted = [] 146 | index=0 147 | for i in res2: 148 | if solution[index] < 1: 149 | text_converted.append (i) 150 | elif solution[index] >= 1: 151 | number, word = Synonym(i,solution[index]) 152 | text_converted.append (word.upper()) 153 | else: 154 | print ('Error') 155 | index += 1 156 | 157 | result = listToString(text_converted) 158 | return result 159 | 160 | # Define the multi-objective optimization problem 161 | def evaluate(x): 162 | source = text 163 | target = obtain_text(x) 164 | 165 | return len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target)) 166 | 167 | def generate_random_solution(bounds): 168 | return [random.uniform(bounds[i][0], bounds[i][1]) for i in range(len(bounds))] 169 | 170 | def levy_flight(beta): 171 | return np.power((1.0 / np.random.gamma(1.0 + beta)), 1.0 / beta) 172 | 173 | def cuckoo_search_multiobjective(bounds, generations, population_size, pa): 174 | dim = len(bounds) 175 | population = [generate_random_solution(bounds) for _ in range(population_size)] 176 | 177 | for gen in range(generations): 178 | population.sort(key=lambda x: evaluate(x)) 179 | new_population = population[:population_size//2] 180 | 181 | for _ in range(population_size - population_size//2): 182 | if random.random() < pa: 183 | selected_cuckoo = random.choice(new_population) 184 | cuckoo = [x + levy_flight(1.5) * (x - y) for x, y in zip(selected_cuckoo, population[random.randint(0, population_size//2-1)])] 185 | cuckoo = np.clip(cuckoo, bounds[:, 0], bounds[:, 1]) 186 | new_population.append(cuckoo) 187 | else: 188 | new_population.append(generate_random_solution(bounds)) 189 | 190 | population = new_population 191 | 192 | population.sort(key=lambda x: evaluate(x)) 193 | 194 | pareto_front = [] 195 | for ind in population: 196 | dominated = False 197 | to_remove = [] 198 | for idx, existing in enumerate(pareto_front): 199 | if all(a <= b for a, b in zip(existing, ind)): 200 | to_remove.append(idx) 201 | elif all(a >= b for a, b in zip(existing, ind)): 202 | dominated = True 203 | break 204 | if not dominated: 205 | pareto_front = [existing for idx, existing in enumerate(pareto_front) if idx not in to_remove] 206 | pareto_front.append(ind) 207 | 208 | return pareto_front 209 | 210 | 211 | if __name__ == "__main__": 212 | 213 | start_time = time.time() # Record the starting time 214 | random.seed(42) 215 | np.random.seed(42) 216 | 217 | individual_length = len(index_array) # Length of the individual (example value) 218 | bounds = np.array([[-5, 5]] * individual_length) # Example bounds for variables 219 | 220 | generations = 40 221 | population_size = 20 222 | pa = 0.25 223 | 224 | pareto_front = cuckoo_search_multiobjective(bounds, generations, population_size, pa) 225 | 226 | front = [] 227 | for ind in pareto_front: 228 | print (evaluate(ind)) 229 | front.append (evaluate(ind)) 230 | end_time = time.time() # Record the ending time 231 | elapsed_time = end_time - start_time # Calculate the elapsed time 232 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 233 | 234 | ref_point = [30.0, 20.0, 1.0] 235 | hypervolume = calculate_hypervolume(front, ref_point) 236 | print("Hypervolume:", hypervolume) 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | -------------------------------------------------------------------------------- /oruga_webscraping.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | # Modules 11 | import pygad 12 | import requests 13 | import language_tool_python 14 | from readability import Readability 15 | 16 | #print(r.flesch_kincaid().score) 17 | #print(r.flesch().score) 18 | #print(r.gunning_fog()) 19 | #print(r.coleman_liau()) 20 | #print(r.dale_chall()) 21 | #print(r.ari()) 22 | #print(r.linsear_write()) 23 | #print(r.spache()) 24 | 25 | #Coding of individuals 26 | #-2, candidate but not synonym 27 | #-1, special character (if necessary) 28 | #0, not candidate 29 | #1, replaced by 1st option 30 | #2, replaced by 2nd option 31 | #N, replaced by Nth option 32 | 33 | text_array = [] 34 | index_array = [] 35 | 36 | #text 37 | text = 'Austria emerged from the remnants of the Eastern and Hungarian March at the end of the first millennium. Originally a margraviate of Bavaria, it developed into a duchy of the Holy Roman Empire in 1156 and was later made an archduchy in 1453. In the 16th century, Vienna began serving as the empire administrative capital and Austria thus became the heartland of the Habsburg monarchy. After the dissolution of the Holy Roman Empire in 1806, Austria established its own empire, which became a great power and the dominant member of the German Confederation. The defeat in the Austro-Prussian War of 1866 led to the end of the Confederation and paved the way for the establishment of Austria-Hungary a year later.' 38 | 39 | r = Readability(text) 40 | initial_score = r.flesch_kincaid().score 41 | 42 | #Creates a dictionary in order to store all the synonyms in main memory 43 | resource = text.split() 44 | Dict = {} 45 | for i in resource: 46 | if ',' in i: 47 | i = i.replace(',', '') 48 | if '.' in i: 49 | i = i.replace('.', '') 50 | 51 | if i in Dict.keys(): 52 | print ("Processing...Please wait") 53 | else: 54 | if (not i[0].isupper() and len(i) > 3): 55 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 56 | req = requests.get(str1) 57 | try: 58 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 59 | except TypeError as e: 60 | print ("Processing...Please wait") 61 | dict_synonyms = None 62 | 63 | if dict_synonyms is not None: 64 | synonyms = [r["term"] for r in dict_synonyms] 65 | if synonyms: 66 | Dict[i] = [] 67 | Dict[i] = synonyms 68 | 69 | 70 | def listToString(s): 71 | str1 = "" 72 | for ele in s: 73 | str1 += str(ele) 74 | str1 += " " 75 | 76 | str1 = str1.replace(' ,', ',') 77 | str1 = str1.replace('_', ' ') 78 | return str1 79 | 80 | def correct_mistakes (text): 81 | my_tool = language_tool_python.LanguageTool('en-US') 82 | my_text = text 83 | my_matches = my_tool.check(my_text) 84 | 85 | myMistakes = [] 86 | myCorrections = [] 87 | startPositions = [] 88 | endPositions = [] 89 | 90 | # using the for-loop 91 | for rules in my_matches: 92 | if len(rules.replacements) > 0: 93 | startPositions.append(rules.offset) 94 | endPositions.append(rules.errorLength + rules.offset) 95 | myMistakes.append(my_text[rules.offset : rules.errorLength + rules.offset]) 96 | myCorrections.append(rules.replacements[0]) 97 | 98 | # creating new object 99 | my_NewText = list(my_text) 100 | 101 | # rewriting the correct passage 102 | for n in range(len(startPositions)): 103 | for i in range(len(my_text)): 104 | my_NewText[startPositions[n]] = myCorrections[n] 105 | if (i > startPositions[n] and i < endPositions[n]): 106 | my_NewText[i] = "" 107 | 108 | my_NewText = "".join(my_NewText) 109 | 110 | return my_NewText 111 | 112 | def Synonym(word, number): 113 | synonyms = [] 114 | 115 | if (Dict.get(word) is not None): 116 | synonyms = Dict.get(word) 117 | 118 | if (not synonyms): 119 | return -2, word 120 | elif number >= len(synonyms): 121 | return len(synonyms)-1, synonyms[len(synonyms)-1] 122 | else: 123 | return int(number), synonyms[int(number-1)] 124 | 125 | def obtain_text (solution): 126 | res2 = text.split() 127 | text_converted = [] 128 | index=0 129 | for i in res2: 130 | if solution[index] < 1: 131 | text_converted.append (i) 132 | elif solution[index] >= 1: 133 | number, word = Synonym(i,solution[index]) 134 | text_converted.append (word.upper()) 135 | else: 136 | print ('Error') 137 | index += 1 138 | 139 | result = listToString(text_converted) 140 | return result 141 | 142 | def fitness_func(solution, solution_idx): 143 | 144 | #preprocessing 145 | a = 0 146 | for i in index_array: 147 | if index_array[a] <= 0: 148 | solution[a] = 0 149 | a += 1 150 | 151 | res2 = text.split() 152 | text_converted = [] 153 | index=0 154 | for i in res2: 155 | if solution[index] < 1: 156 | text_converted.append (i) 157 | elif solution[index] >= 1: 158 | number, word = Synonym(i,solution[index]) 159 | text_converted.append (word.upper()) 160 | else: 161 | print ('Error') 162 | index += 1 163 | 164 | result = listToString(text_converted) 165 | r = Readability(result) 166 | return r.flesch_kincaid().score * -1 167 | 168 | print (text) 169 | res = text.split() 170 | 171 | for i in res: 172 | flag = 0 173 | if ',' in i: 174 | i = i.replace(',', '') 175 | flag = 1 176 | if '.' in i: 177 | i = i.replace('.', '') 178 | flag = 2 179 | 180 | if (not i[0].isupper() and len(i) > 3): 181 | number, word = Synonym(i,6) 182 | text_array.append (word) 183 | index_array.append (number) 184 | else: 185 | text_array.append (i) 186 | index_array.append (0) 187 | 188 | if flag == 1: 189 | cad = str(text_array[-1]) 190 | text_array.pop() 191 | cad = cad + str(',') 192 | text_array.append (cad) 193 | flag = 0 194 | if flag == 2: 195 | cad = str(text_array[-1]) 196 | text_array.pop() 197 | cad = cad + str('.') 198 | text_array.append (cad) 199 | flag = 0 200 | 201 | newText = listToString(text_array) 202 | print(newText) 203 | print(index_array) 204 | 205 | # Parameters for the GA 206 | function_inputs = index_array 207 | num_generations = 100 # Number of generations 208 | num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool 209 | sol_per_pop = 20 # Number of solutions in the population 210 | num_genes = len(function_inputs) # Number of genes 211 | 212 | # Initialize the GA instance without the 'on_generation' argument 213 | ga_instance = pygad.GA(num_generations=1, # Set to 1 because we are controlling the generations manually 214 | num_parents_mating=num_parents_mating, 215 | sol_per_pop=sol_per_pop, 216 | num_genes=num_genes, 217 | fitness_func=fitness_func) 218 | 219 | last_fitness = 0 # Initialize last fitness for comparison 220 | 221 | # Manually iterate through generations 222 | for generation in range(num_generations): 223 | ga_instance.run() # Run GA for one generation 224 | 225 | # Getting the best solution after the current generation 226 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 227 | 228 | print("Generation = {}".format(generation + 1)) 229 | print("Fitness = {}".format(solution_fitness)) 230 | print("Change = {}".format(solution_fitness - last_fitness)) 231 | 232 | last_fitness = solution_fitness # Update the last fitness value 233 | 234 | # At this point, the GA has completed all generations 235 | # You can directly get the best solution details without passing any arguments 236 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 237 | print("Parameters of the best solution : {solution}".format(solution=solution)) 238 | print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) 239 | print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) 240 | 241 | new_text = correct_mistakes(obtain_text(solution)) 242 | rr = Readability(new_text) 243 | print (new_text) 244 | print ("Difference " + str(initial_score - rr.flesch_kincaid().score)) -------------------------------------------------------------------------------- /oruga_massive_experiments_smog.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ORUGA: Optimizing Readability Using Genetic Algorithms 4 | 5 | [Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023 6 | 7 | @author: Jorge Martinez-Gil 8 | """ 9 | 10 | import csv 11 | import pygad 12 | import language_tool_python 13 | from readability import Readability 14 | from nltk.corpus import wordnet 15 | from nltk.tokenize import sent_tokenize 16 | 17 | def main(): 18 | 19 | text_array = [] 20 | index_array = [] 21 | text = "" 22 | global last_fitness 23 | 24 | def listToString(s): 25 | str1 = "" 26 | for ele in s: 27 | str1 += str(ele) 28 | str1 += " " 29 | 30 | str1 = str1.replace(' ,', ',') 31 | str1 = str1.replace('_', ' ') 32 | return str1 33 | 34 | def Synonym(word, number): 35 | synonyms = [] 36 | for syn in wordnet.synsets(word): 37 | for lm in syn.lemmas(): 38 | synonyms.append(lm.name()) 39 | 40 | if (not synonyms): 41 | return -2, word 42 | elif number >= len(synonyms): 43 | return len(synonyms)-1, synonyms[len(synonyms)-1] 44 | else: 45 | return int(number), synonyms[int(number-1)] 46 | 47 | def obtain_text (solution): 48 | res2 = text.split() 49 | text_converted = [] 50 | index=0 51 | for i in res2: 52 | if solution[index] < 1: 53 | text_converted.append (i) 54 | elif solution[index] >= 1: 55 | number, word = Synonym(i,solution[index]) 56 | text_converted.append (word.upper()) 57 | else: 58 | print ('Error') 59 | index += 1 60 | 61 | result = listToString(text_converted) 62 | return result 63 | 64 | def correct_mistakes (text): 65 | my_tool = language_tool_python.LanguageTool('en-US') 66 | my_text = text 67 | my_matches = my_tool.check(my_text) 68 | 69 | myMistakes = [] 70 | myCorrections = [] 71 | startPositions = [] 72 | endPositions = [] 73 | 74 | # using the for-loop 75 | for rules in my_matches: 76 | if len(rules.replacements) > 0: 77 | startPositions.append(rules.offset) 78 | endPositions.append(rules.errorLength + rules.offset) 79 | myMistakes.append(my_text[rules.offset : rules.errorLength + rules.offset]) 80 | myCorrections.append(rules.replacements[0]) 81 | 82 | # creating new object 83 | my_NewText = list(my_text) 84 | 85 | # rewriting the correct passage 86 | for n in range(len(startPositions)): 87 | for i in range(len(my_text)): 88 | my_NewText[startPositions[n]] = myCorrections[n] 89 | if (i > startPositions[n] and i < endPositions[n]): 90 | my_NewText[i] = "" 91 | 92 | my_NewText = "".join(my_NewText) 93 | 94 | return my_NewText 95 | 96 | 97 | def fitness_func(solution, solution_idx): 98 | 99 | #preprocessing 100 | a = 0 101 | for i in index_array: 102 | if index_array[a] <= 0: 103 | solution[a] = 0 104 | a += 1 105 | 106 | res2 = text.split() 107 | text_converted = [] 108 | index=0 109 | for i in res2: 110 | if solution[index] < 1: 111 | text_converted.append (i) 112 | elif solution[index] >= 1: 113 | number, word = Synonym(i,solution[index]) 114 | text_converted.append (word) 115 | else: 116 | print ('Error') 117 | index += 1 118 | 119 | result = listToString(text_converted) 120 | r = Readability(result) 121 | return r.smog().score * -1 122 | 123 | def on_generation(ga_instance): 124 | 125 | print("Generation = {generation}".format(generation=ga_instance.generations_completed)) 126 | print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) 127 | print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) 128 | ast_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] 129 | 130 | with open('texts.txt', 'r') as fd: 131 | reader = csv.reader(fd) 132 | for row in reader: 133 | text = ''.join(row) 134 | sentences = sent_tokenize(text) 135 | while (len(sentences) < 30): 136 | sentences = sentences + sentences 137 | text = ''.join(sentences) 138 | text = text.replace('.', '. ') 139 | print (text) 140 | print (len(sent_tokenize(text))) 141 | r = Readability(text) 142 | initial_score = r.smog().score 143 | res = text.split() 144 | 145 | for i in res: 146 | flag = 0 147 | if ',' in i: 148 | i = i.replace(',', '') 149 | flag = 1 150 | if '.' in i: 151 | i = i.replace('.', '') 152 | flag = 2 153 | 154 | if (not i[0].isupper() and len(i) > 3): 155 | number, word = Synonym(i,6) 156 | text_array.append (word) 157 | index_array.append (number) 158 | else: 159 | text_array.append (i) 160 | index_array.append (0) 161 | 162 | if flag == 1: 163 | cad = text_array[-1] 164 | text_array.pop() 165 | cad = cad + str(',') 166 | text_array.append (cad) 167 | flag = 0 168 | if flag == 2: 169 | cad = text_array[-1] 170 | text_array.pop() 171 | cad = cad + str('.') 172 | text_array.append (cad) 173 | flag = 0 174 | 175 | newText = listToString(text_array) 176 | #print(newText) 177 | print(index_array) 178 | 179 | # Parameters for the GA 180 | function_inputs = index_array 181 | num_generations = 100 # Number of generations 182 | num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool 183 | sol_per_pop = 20 # Number of solutions in the population 184 | num_genes = len(function_inputs) # Number of genes 185 | 186 | # Initialize the GA instance without the 'on_generation' argument 187 | ga_instance = pygad.GA(num_generations=1, # Set to 1 because we are controlling the generations manually 188 | num_parents_mating=num_parents_mating, 189 | sol_per_pop=sol_per_pop, 190 | num_genes=num_genes, 191 | fitness_func=fitness_func) 192 | 193 | last_fitness = 0 # Initialize last fitness for comparison 194 | 195 | # Manually iterate through generations 196 | for generation in range(num_generations): 197 | ga_instance.run() # Run GA for one generation 198 | 199 | # Getting the best solution after the current generation 200 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 201 | 202 | print("Generation = {}".format(generation + 1)) 203 | print("Fitness = {}".format(solution_fitness)) 204 | print("Change = {}".format(solution_fitness - last_fitness)) 205 | 206 | last_fitness = solution_fitness # Update the last fitness value 207 | 208 | # At this point, the GA has completed all generations 209 | # You can directly get the best solution details without passing any arguments 210 | solution, solution_fitness, solution_idx = ga_instance.best_solution() 211 | print("Parameters of the best solution : {solution}".format(solution=solution)) 212 | print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) 213 | print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) 214 | 215 | new_text = correct_mistakes(obtain_text(solution)) 216 | rr = Readability(new_text) 217 | 218 | with open('results.txt', 'a') as the_file: 219 | the_file.write("Difference " + str(initial_score - rr.smog().score) + str('\n')) 220 | 221 | 222 | 223 | if __name__ == "__main__": 224 | for x in range(10): 225 | main() -------------------------------------------------------------------------------- /comparison/word2vec/tlbo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import nltk 6 | from deap import base, creator, tools 7 | import gensim.downloader as api 8 | import gensim.downloader 9 | import time 10 | start_time = time.time() # Record the starting time 11 | 12 | model = api.load('word2vec-google-news-300') 13 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 14 | 15 | def calculate_hypervolume(pareto_front, ref_point): 16 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 17 | hypervolume = 0.0 18 | prev_point = [0.0, ref_point[1]] 19 | 20 | for point in sorted_front: 21 | if point[1] < prev_point[1]: 22 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 23 | prev_point = point 24 | 25 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 26 | return hypervolume 27 | 28 | def listToString(s): 29 | str1 = "" 30 | for ele in s: 31 | str1 += str(ele) 32 | str1 += " " 33 | 34 | str1 = str1.replace(' ,', ',') 35 | str1 = str1.replace('_', ' ') 36 | return str1 37 | 38 | def Synonym(word, number): 39 | synonyms = [] 40 | 41 | if (Dict.get(word) is not None): 42 | synonyms = Dict.get(word) 43 | 44 | if (not synonyms): 45 | return -2, word 46 | elif number >= len(synonyms): 47 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 48 | else: 49 | return int(number), synonyms[int(number-1)][0] 50 | 51 | def fitness_func1(solution): 52 | print (solution) 53 | 54 | #preprocessing 55 | a = 0 56 | for i in index_array: 57 | if index_array[a] <= 0: 58 | solution[a] = 0 59 | a += 1 60 | 61 | res2 = text.split() 62 | text_converted = [] 63 | index=0 64 | for i in res2: 65 | if solution[index] < 1: 66 | text_converted.append (i) 67 | elif solution[index] >= 1: 68 | number, word = Synonym(i,solution[index]) 69 | text_converted.append (word) 70 | else: 71 | print ('Error') 72 | index += 1 73 | 74 | result = listToString(text_converted) 75 | r = Readability(result) 76 | return r.ari().score 77 | 78 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 79 | 80 | #Creates a dictionary in order to store all the synonyms in main memory 81 | resource = text.split() 82 | Dict = {} 83 | for i in resource: 84 | if ',' in i: 85 | i = i.replace(',', '') 86 | if '.' in i: 87 | i = i.replace('.', '') 88 | 89 | if (not i[0].isupper() and len(i) > 3): 90 | if i in Dict.keys(): 91 | print ("Processing...Please wait") 92 | else: 93 | try: 94 | synonyms = google_news_vectors.most_similar(i, topn=6) 95 | except KeyError as e: 96 | print (e) 97 | synonyms = None 98 | if synonyms is not None: 99 | Dict[i] = [] 100 | Dict[i] = synonyms 101 | 102 | text_array = [] 103 | index_array = [] 104 | 105 | res = text.split() 106 | for i in res: 107 | flag = 0 108 | if ',' in i: 109 | i = i.replace(',', '') 110 | flag = 1 111 | if '.' in i: 112 | i = i.replace('.', '') 113 | flag = 2 114 | 115 | if (not i[0].isupper() and len(i) > 3): 116 | number, word = Synonym(i,6) 117 | text_array.append (word) 118 | index_array.append (number) 119 | else: 120 | text_array.append (i) 121 | index_array.append (0) 122 | 123 | if flag == 1: 124 | cad = text_array[-1] 125 | text_array.pop() 126 | cad = cad + str(',') 127 | text_array.append (cad) 128 | flag = 0 129 | if flag == 2: 130 | cad = text_array[-1] 131 | text_array.pop() 132 | cad = cad + str('.') 133 | text_array.append (cad) 134 | flag = 0 135 | 136 | def obtain_text (solution): 137 | res2 = text.split() 138 | text_converted = [] 139 | index=0 140 | for i in res2: 141 | if solution[index] < 1: 142 | text_converted.append (i) 143 | elif solution[index] >= 1: 144 | number, word = Synonym(i,solution[index]) 145 | text_converted.append (word.upper()) 146 | else: 147 | print ('Error') 148 | index += 1 149 | 150 | result = listToString(text_converted) 151 | return result 152 | 153 | # Define your problem's objective function 154 | def objective_function(x): 155 | 156 | source = text 157 | target = obtain_text(x) 158 | 159 | return [len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target))] 160 | 161 | # TLBO parameters 162 | population_size = 20 163 | max_generations = 50 164 | dimension = len(index_array) 165 | lower_bound = -4 166 | upper_bound = 4 167 | 168 | # DEAP initialization 169 | creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0, -1.0)) 170 | creator.create("Individual", list, fitness=creator.FitnessMin) 171 | 172 | toolbox = base.Toolbox() 173 | toolbox.register("attr_float", random.uniform, lower_bound, upper_bound) 174 | toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=dimension) 175 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 176 | 177 | toolbox.register("evaluate", objective_function) 178 | 179 | def teaching_phase(learners, mean_teacher): 180 | for learner in learners: 181 | diff = [mean_teacher[dim] - learner[dim] for dim in range(dimension)] 182 | random_values = [random.random() for _ in range(dimension)] 183 | update_vector = [random_value * diff[dim] for dim, random_value in enumerate(random_values)] 184 | learner[:] = [learner[dim] + update_vector[dim] for dim in range(dimension)] 185 | 186 | toolbox.register("mate", tools.cxBlend, alpha=0.5) 187 | toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1) 188 | toolbox.register("select", tools.selBest) 189 | 190 | 191 | def main(): 192 | population = toolbox.population(n=population_size) 193 | 194 | # Attach fitness values to the initial population 195 | for ind in population: 196 | ind.fitness.values = toolbox.evaluate(ind) 197 | 198 | pareto_front = [] # Initialize Pareto front 199 | 200 | # TLBO main loop 201 | for generation in range(max_generations): 202 | teachers = toolbox.select(population, k=5) 203 | mean_teacher = [sum(teacher[dim] for teacher in teachers) / len(teachers) for dim in range(dimension)] 204 | 205 | learners = population[:] 206 | teaching_phase(learners, mean_teacher) 207 | 208 | offspring = learners[:] # No genetic operations 209 | 210 | # Attach fitness values to the offspring 211 | for ind in offspring: 212 | ind.fitness.values = toolbox.evaluate(ind) 213 | 214 | for i in range(population_size): 215 | offspring_fitness = offspring[i].fitness.values 216 | for ind in population: 217 | if ind != offspring[i]: 218 | ind_fitness = ind.fitness.values 219 | is_dominated = all(offspring_fitness[dim] <= ind_fitness[dim] for dim in range(len(offspring_fitness))) 220 | if not is_dominated: 221 | if offspring[i] not in pareto_front: 222 | pareto_front.append(offspring[i]) 223 | 224 | final = [] 225 | front = [] 226 | for item in pareto_front: 227 | if item not in front: 228 | front.append(item) 229 | 230 | # Print fitness 231 | for solution in front: 232 | final.append(solution.fitness.values) 233 | print("Fitness:", solution.fitness.values) 234 | 235 | end_time = time.time() # Record the ending time 236 | elapsed_time = end_time - start_time # Calculate the elapsed time 237 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 238 | 239 | ref_point = [30.0, 20.0, 1.0] 240 | hypervolume = calculate_hypervolume(final, ref_point) 241 | print("Hypervolume:", hypervolume) 242 | 243 | print (len(pareto_front)) 244 | 245 | if __name__ == "__main__": 246 | main() 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | -------------------------------------------------------------------------------- /comparison/web/tlbo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from readability import Readability 3 | from nltk.corpus import wordnet 4 | import random 5 | import nltk 6 | from deap import base, creator, tools 7 | import gensim.downloader as api 8 | import gensim.downloader 9 | import requests 10 | import time 11 | start_time = time.time() # Record the starting time 12 | 13 | model = api.load('word2vec-google-news-300') 14 | google_news_vectors = gensim.downloader.load('word2vec-google-news-300') 15 | 16 | def calculate_hypervolume(pareto_front, ref_point): 17 | sorted_front = sorted(pareto_front, key=lambda x: x[0]) 18 | hypervolume = 0.0 19 | prev_point = [0.0, ref_point[1]] 20 | 21 | for point in sorted_front: 22 | if point[1] < prev_point[1]: 23 | hypervolume += (prev_point[0] - point[0]) * (prev_point[1] - ref_point[1]) 24 | prev_point = point 25 | 26 | hypervolume += (prev_point[0] - ref_point[0]) * (prev_point[1] - ref_point[1]) 27 | return hypervolume 28 | 29 | def listToString(s): 30 | str1 = "" 31 | for ele in s: 32 | str1 += str(ele) 33 | str1 += " " 34 | 35 | str1 = str1.replace(' ,', ',') 36 | str1 = str1.replace('_', ' ') 37 | return str1 38 | 39 | def Synonym(word, number): 40 | synonyms = [] 41 | 42 | if (Dict.get(word) is not None): 43 | synonyms = Dict.get(word) 44 | 45 | if (not synonyms): 46 | return -2, word 47 | elif number >= len(synonyms): 48 | return len(synonyms)-1, synonyms[len(synonyms)-1][0] 49 | else: 50 | return int(number), synonyms[int(number-1)][0] 51 | 52 | def fitness_func1(solution): 53 | print (solution) 54 | 55 | #preprocessing 56 | a = 0 57 | for i in index_array: 58 | if index_array[a] <= 0: 59 | solution[a] = 0 60 | a += 1 61 | 62 | res2 = text.split() 63 | text_converted = [] 64 | index=0 65 | for i in res2: 66 | if solution[index] < 1: 67 | text_converted.append (i) 68 | elif solution[index] >= 1: 69 | number, word = Synonym(i,solution[index]) 70 | text_converted.append (word) 71 | else: 72 | print ('Error') 73 | index += 1 74 | 75 | result = listToString(text_converted) 76 | r = Readability(result) 77 | return r.ari().score 78 | 79 | text = 'The sea moderates the climate and has important roles in the water cycle, carbon cycle, and nitrogen cycle. Humans harnessing and studying the sea have been recorded since ancient times, and evidenced well into prehistory, while its modern scientific study is called oceanography. The most abundant solid dissolved in seawater is sodium chloride. The water also contains salts of magnesium, calcium, potassium, and mercury, amongst many other elements, some in minute concentrations. Salinity varies widely, being lower near the surface and the mouths of large rivers and higher in the depths of the ocean; however, the relative proportions of dissolved salts vary little across the oceans.' 80 | 81 | #Creates a dictionary in order to store all the synonyms in main memory 82 | resource = text.split() 83 | Dict = {} 84 | for i in resource: 85 | if ',' in i: 86 | i = i.replace(',', '') 87 | if '.' in i: 88 | i = i.replace('.', '') 89 | 90 | if i in Dict.keys(): 91 | print ("Processing...Please wait") 92 | else: 93 | if (not i[0].isupper() and len(i) > 3): 94 | str1 = 'https://tuna.thesaurus.com/pageData/' + str(i) 95 | req = requests.get(str1) 96 | try: 97 | dict_synonyms = req.json()['data']['definitionData']['definitions'][0]['synonyms'] 98 | except TypeError as e: 99 | print ("Processing...Please wait") 100 | dict_synonyms = None 101 | 102 | if dict_synonyms is not None: 103 | synonyms = [r["term"] for r in dict_synonyms] 104 | if synonyms: 105 | Dict[i] = [] 106 | Dict[i] = synonyms 107 | 108 | text_array = [] 109 | index_array = [] 110 | 111 | res = text.split() 112 | for i in res: 113 | flag = 0 114 | if ',' in i: 115 | i = i.replace(',', '') 116 | flag = 1 117 | if '.' in i: 118 | i = i.replace('.', '') 119 | flag = 2 120 | 121 | if (not i[0].isupper() and len(i) > 3): 122 | number, word = Synonym(i,6) 123 | text_array.append (word) 124 | index_array.append (number) 125 | else: 126 | text_array.append (i) 127 | index_array.append (0) 128 | 129 | if flag == 1: 130 | cad = text_array[-1] 131 | text_array.pop() 132 | cad = cad + str(',') 133 | text_array.append (cad) 134 | flag = 0 135 | if flag == 2: 136 | cad = text_array[-1] 137 | text_array.pop() 138 | cad = cad + str('.') 139 | text_array.append (cad) 140 | flag = 0 141 | 142 | def obtain_text (solution): 143 | res2 = text.split() 144 | text_converted = [] 145 | index=0 146 | for i in res2: 147 | if solution[index] < 1: 148 | text_converted.append (i) 149 | elif solution[index] >= 1: 150 | number, word = Synonym(i,solution[index]) 151 | text_converted.append (word.upper()) 152 | else: 153 | print ('Error') 154 | index += 1 155 | 156 | result = listToString(text_converted) 157 | return result 158 | 159 | # Define your problem's objective function 160 | def objective_function(x): 161 | 162 | source = text 163 | target = obtain_text(x) 164 | 165 | return [len([1 for i in x if i >= 1]), fitness_func1(x), float (model.wmdistance(source, target))] 166 | 167 | # TLBO parameters 168 | population_size = 20 169 | max_generations = 50 170 | dimension = len(index_array) 171 | lower_bound = -4 172 | upper_bound = 4 173 | 174 | # DEAP initialization 175 | creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0, -1.0)) 176 | creator.create("Individual", list, fitness=creator.FitnessMin) 177 | 178 | toolbox = base.Toolbox() 179 | toolbox.register("attr_float", random.uniform, lower_bound, upper_bound) 180 | toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=dimension) 181 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 182 | 183 | toolbox.register("evaluate", objective_function) 184 | 185 | def teaching_phase(learners, mean_teacher): 186 | for learner in learners: 187 | diff = [mean_teacher[dim] - learner[dim] for dim in range(dimension)] 188 | random_values = [random.random() for _ in range(dimension)] 189 | update_vector = [random_value * diff[dim] for dim, random_value in enumerate(random_values)] 190 | learner[:] = [learner[dim] + update_vector[dim] for dim in range(dimension)] 191 | 192 | toolbox.register("mate", tools.cxBlend, alpha=0.5) 193 | toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1) 194 | toolbox.register("select", tools.selBest) 195 | 196 | 197 | def main(): 198 | population = toolbox.population(n=population_size) 199 | 200 | # Attach fitness values to the initial population 201 | for ind in population: 202 | ind.fitness.values = toolbox.evaluate(ind) 203 | 204 | pareto_front = [] # Initialize Pareto front 205 | 206 | # TLBO main loop 207 | for generation in range(max_generations): 208 | teachers = toolbox.select(population, k=5) 209 | mean_teacher = [sum(teacher[dim] for teacher in teachers) / len(teachers) for dim in range(dimension)] 210 | 211 | learners = population[:] 212 | teaching_phase(learners, mean_teacher) 213 | 214 | offspring = learners[:] # No genetic operations 215 | 216 | # Attach fitness values to the offspring 217 | for ind in offspring: 218 | ind.fitness.values = toolbox.evaluate(ind) 219 | 220 | for i in range(population_size): 221 | offspring_fitness = offspring[i].fitness.values 222 | for ind in population: 223 | if ind != offspring[i]: 224 | ind_fitness = ind.fitness.values 225 | is_dominated = all(offspring_fitness[dim] <= ind_fitness[dim] for dim in range(len(offspring_fitness))) 226 | if not is_dominated: 227 | if offspring[i] not in pareto_front: 228 | pareto_front.append(offspring[i]) 229 | 230 | final = [] 231 | front = [] 232 | for item in pareto_front: 233 | if item not in front: 234 | front.append(item) 235 | 236 | end_time = time.time() # Record the ending time 237 | 238 | elapsed_time = end_time - start_time # Calculate the elapsed time 239 | print(f"Elapsed time: {elapsed_time:.2f} seconds") 240 | 241 | 242 | # Print fitness 243 | for solution in front: 244 | final.append(solution.fitness.values) 245 | 246 | ref_point = [30.0, 20.0, 1.0] 247 | hypervolume = calculate_hypervolume(final, ref_point) 248 | print("Hypervolume:", hypervolume) 249 | 250 | print (len(pareto_front)) 251 | 252 | if __name__ == "__main__": 253 | main() 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | --------------------------------------------------------------------------------