├── catch
    ├── output
    │   └── .gitkeep
    ├── test
    │   ├── wordcloud.png
    │   ├── log.txt
    │   ├── annotation_grep.txt
    │   ├── annotation_wtags.txt
    │   ├── annotation_invertedgrep.txt
    │   ├── cyannotator.html
    │   └── social_media_posts.txt
    ├── params_catch.py
    ├── README.md
    └── catch.py
├── docs
    ├── _config.yml
    ├── workflow.png
    ├── paper.bib
    ├── paper.md
    └── README.md
├── bandersnatch
    ├── test
    │   ├── ontology_tags.txt
    │   ├── words_of_interest.txt
    │   ├── snatch_output.txt
    │   └── pocketmonsters.owl
    ├── README.md
    ├── params_snatch.py
    └── bandersnatch.py
├── flame
    └── README.md
├── bite
    ├── test
    │   ├── bite_output_plot.png
    │   ├── bite_output_stats.txt
    │   └── bite_output.tsv
    ├── README.md
    ├── params_bite.py
    └── bite.py
├── eyes
    ├── test
    │   └── pocketmonsters_web.png
    ├── README.md
    ├── params_eyes.py
    └── eyes.py
├── arise
    ├── README.md
    ├── test
    │   ├── new_annotations.tsv
    │   └── pocketmonsters_updated.owl
    ├── params_arise.py
    └── arise.py
├── Changelog.md
├── LICENSE
├── requirements.py
├── Contributing.md
├── README.md
├── .gitignore
└── highlevel.py


/catch/output/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/docs/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sap218/jabberwocky/HEAD/docs/workflow.png


--------------------------------------------------------------------------------
/bandersnatch/test/ontology_tags.txt:
--------------------------------------------------------------------------------
1 | oboInOWL:hasExactSynonym
2 | oboInOWL:hasRelatedSynonym
3 | 


--------------------------------------------------------------------------------
/catch/test/wordcloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sap218/jabberwocky/HEAD/catch/test/wordcloud.png


--------------------------------------------------------------------------------
/flame/README.md:
--------------------------------------------------------------------------------
1 | # README - `flame`
2 | 
3 | *Come back soon...*
4 | 
5 | ***
6 | 
7 | End of page
8 | 


--------------------------------------------------------------------------------
/bite/test/bite_output_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sap218/jabberwocky/HEAD/bite/test/bite_output_plot.png


--------------------------------------------------------------------------------
/eyes/test/pocketmonsters_web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sap218/jabberwocky/HEAD/eyes/test/pocketmonsters_web.png


--------------------------------------------------------------------------------
/bite/test/bite_output_stats.txt:
--------------------------------------------------------------------------------
1 | time taken to run tf-idf: 0.001
2 | tf-idf raw df length: 137
3 | tf-idf adj. df length: 115
4 | 


--------------------------------------------------------------------------------
/bandersnatch/test/words_of_interest.txt:
--------------------------------------------------------------------------------
1 | generation one
2 | dragon
3 | route
4 | water
5 | small
6 | large
7 | generation six
8 | 


--------------------------------------------------------------------------------
/bandersnatch/test/snatch_output.txt:
--------------------------------------------------------------------------------
 1 | generation one
 2 | generation 1
 3 | gen 1
 4 | gen one
 5 | dragon
 6 | route
 7 | water
 8 | small
 9 | large
10 | generation six
11 | generation 6
12 | gen 6
13 | gen six
14 | 


--------------------------------------------------------------------------------
/arise/README.md:
--------------------------------------------------------------------------------
 1 | # README - `arise` 
 2 | 
 3 | ## `ontology_name`
 4 | - ontology file (+path)
 5 | 
 6 | ## `annotation_file`
 7 | - file of annotations
 8 | - can be either `.tsv` or `.csv`
 9 | 
10 | ***
11 | 
12 | End of page
13 | 


--------------------------------------------------------------------------------
/catch/test/log.txt:
--------------------------------------------------------------------------------
1 | is this a test: True
2 | stopword filter level: light
3 | concepts count: 13
4 | post count: 26
5 | average word count: 12.076923076923077
6 | time taken to annotate (seconds): 0.09
7 | time taken to run script (seconds): 4.55
8 | 


--------------------------------------------------------------------------------
/arise/test/new_annotations.tsv:
--------------------------------------------------------------------------------
1 | annotation	class	tag
2 | path	route	oboInOWL:hasExactSynonym
3 | evolve	generation	oboInOWL:hasRelatedSynonym
4 | flew	flying	oboInOWL:hasExactSynonym
5 | mega	large	oboInOWL:hasRelatedSynonym
6 | breed	type	oboInOWL:hasRelatedSynonym
7 | air	flying	oboInOWL:hasRelatedSynonym
8 | https://pokemon.fandom.com/wiki/Types	type	oboInOWL:DbXref


--------------------------------------------------------------------------------
/bandersnatch/README.md:
--------------------------------------------------------------------------------
 1 | # README - `bandersnatch`
 2 | 
 3 | ## `ontology_name`
 4 | - ontology file (+path)
 5 | 
 6 | ## `ontology_tags`
 7 | - newline delimited file
 8 | 
 9 | ## `classes_of_interest`
10 | - newline delimited file with ontology classes of interest
11 | - users can leave blank to use all classes
12 | 
13 | ***
14 | 
15 | End of page
16 | 


--------------------------------------------------------------------------------
/eyes/README.md:
--------------------------------------------------------------------------------
 1 | # README - `eyes`
 2 | 
 3 | ## `ontology`
 4 | - ontology file (+path)
 5 | 
 6 | ## `plot_type`
 7 | - choose `web` or `tree`
 8 | 
 9 | ## `*_colour`
10 | - colours for `superclass` (default orange) and `subclass` (default skyblue)
11 | 
12 | ## `to_annotate_subclasses`
13 | - choose `True` or `False` to overlay text for subclasses in plots
14 | - recommended to use `False` if ontologies are large
15 | 
16 | ***
17 | 
18 | End of page
19 | 


--------------------------------------------------------------------------------
/catch/test/annotation_grep.txt:
--------------------------------------------------------------------------------
 1 | Any small pokemon nearby? I need to catch a Metapod!
 2 | I think only gen 6 pokemon are on this path - try route 2.
 3 | I've checked that route twice already. I just want my Caterpie to evolve already.
 4 | Anyone else dislike the new Pokedex? What happened to old fashioned gen 1?
 5 | Anyone want a battle? I'm on route 13.
 6 | I'll join but we should move to the meadow area near that route.
 7 | Hey guys! Lake near route 7 I saw a Gyarados!
 8 | What route is best for small normal pokemon? My Skitty needs a friend.
 9 | Go to route 4, we're totally not on that path and we don't plan on catching your Skitty away from you.
10 | 


--------------------------------------------------------------------------------
/catch/test/annotation_wtags.txt:
--------------------------------------------------------------------------------
 1 | ['small'] # Any small pokemon nearby? I need to catch a Metapod!
 2 | ['gen 6', 'route'] # I think only gen 6 pokemon are on this path - try route 2.
 3 | ['route'] # I've checked that route twice already. I just want my Caterpie to evolve already.
 4 | ['gen 1'] # Anyone else dislike the new Pokedex? What happened to old fashioned gen 1?
 5 | ['route'] # Anyone want a battle? I'm on route 13.
 6 | ['route'] # I'll join but we should move to the meadow area near that route.
 7 | ['route'] # Hey guys! Lake near route 7 I saw a Gyarados!
 8 | ['route', 'small'] # What route is best for small normal pokemon? My Skitty needs a friend.
 9 | ['route'] # Go to route 4, we're totally not on that path and we don't plan on catching your Skitty away from you.
10 | 


--------------------------------------------------------------------------------
/arise/params_arise.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @GitHub: github.com/sap218/jabberwocky
 7 | """
 8 | 
 9 | ####################################################
10 | #
11 | # PARAMETERS FOR ARISE
12 | #
13 | ####################################################
14 | 
15 | is_this_a_test = True
16 | 
17 | ####################################################
18 | 
19 | if is_this_a_test:
20 |     ontology_name = "../bandersnatch/test/pocketmonsters"
21 |     annotation_file = "../arise/test/new_annotations"
22 |     output_name = "../arise/test/%s" % ontology_name.split("/")[-1]
23 |     
24 | else:
25 |     ontology_name = ""
26 |     annotation_file = ""
27 |     
28 |     output_name = "%s" % ontology_name.split("/")[-1]
29 | 
30 | ####################################################
31 | 
32 | # End of script
33 | 


--------------------------------------------------------------------------------
/bandersnatch/params_snatch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @GitHub: github.com/sap218/jabberwocky
 7 | """
 8 | 
 9 | ####################################################
10 | #
11 | # PARAMETERS FOR SNATCH
12 | #
13 | ####################################################
14 | 
15 | is_this_a_test = True
16 | 
17 | ####################################################
18 | 
19 | if is_this_a_test:
20 |     ontology_name = "test/pocketmonsters"
21 |     ontology_tags = "test/ontology_tags"
22 |     classes_of_interest = "test/words_of_interest" # if empty, extract all annotations of all classes
23 |     output_name = "test/snatch_output"
24 |     
25 | else:
26 |     ontology_name = ""
27 |     ontology_tags = ""
28 |     classes_of_interest = ""
29 |     
30 |     output_name = "snatch_output"
31 | 
32 | ####################################################
33 | 
34 | # End of script
35 | 


--------------------------------------------------------------------------------
/bite/README.md:
--------------------------------------------------------------------------------
 1 | # README - `bite` 
 2 | 
 3 | ## `corpus`
 4 | - file with each post/sentence on a new line
 5 | - can be `catch` output (grep)
 6 | 
 7 | ## `concepts_to_remove`
 8 | - concepts file with each on a new line to remove from TF-IDF statistical rankings
 9 | - can be `snatch` output
10 | - users can leave blank to use all terms in corpus
11 | 
12 | ## `filter_level`
13 | - parameter for which stop words list to use
14 | - "light" is a smaller list consisting only of 179 stop words
15 | - "heavy" is much larger list consisting of 1160 stop words
16 | 
17 | ## `ngram_count`
18 | - a list of n-grams for TF-IDF
19 | - can modify for unigram only `[1]` or for bi-grams & tri-grams `[2,3]`
20 | 
21 | ## `graph`
22 | - plot TF-IDF rankings
23 | 
24 | ### `cm`
25 | - plotting colour for bars
26 | - recommended to use mediumseagreen, steelblue, or lightcoral
27 | 
28 | ### `limit` 
29 | - plot limit for top-N terms (default is 30)
30 | 
31 | ***
32 | 
33 | End of page
34 | 


--------------------------------------------------------------------------------
/catch/params_catch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @GitHub: github.com/sap218/jabberwocky
 7 | """
 8 | 
 9 | ####################################################
10 | #
11 | # PARAMETERS FOR CATCH
12 | #
13 | # Any completed fields below are recommendations
14 | #
15 | ####################################################
16 | 
17 | is_this_a_test = True
18 | 
19 | ####################################################
20 | 
21 | file_corpus = ""
22 | file_words_of_interest = ""
23 | 
24 | filter_level = "light" # or "none" or "heavy"
25 | output_format = "wtags" # ["wtags","grep","invertedgrep"]
26 | 
27 | output_name = "test"
28 | 
29 | plotWORDCLOUD = True
30 | if plotWORDCLOUD:
31 |     colormapWC = "Set3" # default
32 |     
33 | plotCYANNOTATOR = True
34 | if plotCYANNOTATOR:
35 |     highlightcolour = "#00bcd4" # default = cyan
36 | 
37 | ####################################################
38 | 
39 | # End of script
40 | 


--------------------------------------------------------------------------------
/Changelog.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | * **v3.3.0** [31/10/2025]
 4 |     - now includes word contractions
 5 | * **v3.2.0** [08/05/2025]
 6 |     - cleaning up code, no longer required to use a list of stop words, log files included
 7 | * **v3.1.1** [20/02/2025]
 8 |     - highlighting script: HTML output of annotated posts with highlighted key terms
 9 | * **v3.1.0** [19/06/2024]
10 |     - plotting script for ontologies
11 | 	- users can plot wordcloud without the need for annotations
12 | 	- tf-idf can now be done via n-grams
13 | * **v3.0.0** [12/06/2024]
14 |     - major version change due to code alterations and repository has been redesigned
15 |     - high-level script for functions & vars: text cleaning & stopwords
16 | 	- updated scripts for usability so users only need to edit a params file
17 |     - plotting wordcloud
18 | * **v2.0.0** [10/05/2021]
19 |     - includes `spacy PhraseMatcher()`
20 |     - users can provide their own annotation tags
21 |     - plotting tf-idf
22 | * **v1.0.0** [29/06/2020] 
23 |     - version presented in **JOSS** paper
24 | 
25 | ***
26 | 
27 | End of page
28 | 


--------------------------------------------------------------------------------
/catch/test/annotation_invertedgrep.txt:
--------------------------------------------------------------------------------
 1 | No thanks, I'm, trying to catch a flying type in the mountatins with the clear air.
 2 | I'll be there soon. Need to heal first.
 3 | Currently on the opposite path, training for Brock's gym.
 4 | I'll join too - it'll be nice for Ekans to get some practice - I'll be flying by air.
 5 | Really? Still with an Ekans? Any chance it'll evolve soon?.
 6 | Not yet. Taking is slow - Team Rocket are after one.
 7 | So where are you guys meeting again?
 8 | Remember to be careful everyone! If you see any grunts of Team Rocket please notify us via your pokedex. We are always alert on the path if you need assistance.
 9 | Feel free to pop in the centre if you want to rest.
10 | I need one for my pokedex - luckily I'm only a few minutes away, I won't be too long, just coming down the path now.
11 | Hurry it'll mega-evolve! The other one flew away and I was too late.
12 | What is mega-evolve?
13 | What?! You've not heard of it?!
14 | Drat! I didn't know it took a while for Magikarps to evolve.
15 | Not sure.
16 | Do you mean cat-breed pokemon?
17 | The path near the school is always a good choice.
18 | 


--------------------------------------------------------------------------------
/catch/test/cyannotator.html:
--------------------------------------------------------------------------------
1 | <html><body><span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>small</span> pokemon nearby catch metapod<br>think gen 6 pokemon path try <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span> 2<br>check <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span> twice already want caterpie evolve already<br>anyone else dislike new pokedex happen old fashioned <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>gen 1</span><br>anyone want battle <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span> 13<br>join move meadow area near <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span><br>hey guy lake near <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span> 7 see gyarado<br>route good <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>small</span> normal pokemon skitty friend<br>go <span style='color: #00bcd4; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>route</span> 4 totally path plan catch skitty away</body></html>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-present Samantha Pendleton | Jabberwocky
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/eyes/params_eyes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @GitHub: github.com/sap218/jabberwocky
 7 | """
 8 | 
 9 | ####################################################
10 | #
11 | # PARAMETERS FOR EYES
12 | #
13 | ####################################################
14 | 
15 | is_this_a_test = True
16 | 
17 | ####################################################
18 | 
19 | if is_this_a_test:
20 |     ontology = "../bandersnatch/test/pocketmonsters"
21 |     #ontology = "test/other_ontologies/space"
22 |     
23 |     ontology_name = ontology.split("/")[-1]
24 |     
25 |     plot_type = "tree"
26 |     plot_type = "web"
27 |     
28 |     superclass_colour = "orange"
29 |     subclass_colour = "skyblue"
30 |     
31 |     to_annotate_subclasses = True # False
32 |     
33 |     output_name = "test/%s" % ontology_name
34 |     
35 | else:
36 |     ont = ""
37 |     ontology_name = ontology.split("/")[-1]
38 |     
39 |     plot_type = "web"
40 |     
41 |     superclass_colour = "orange"
42 |     subclass_colour = "skyblue"
43 |     
44 |     to_annotate_subclasses = False
45 |     
46 |     output_name = "%s" % ontology_name
47 | 
48 | ####################################################
49 | 
50 | # End of script
51 | 


--------------------------------------------------------------------------------
/bite/params_bite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @GitHub: github.com/sap218/jabberwocky
 7 | """
 8 | 
 9 | ####################################################
10 | #
11 | # PARAMETERS FOR BITE
12 | #
13 | ####################################################
14 | 
15 | is_this_a_test = True
16 | 
17 | ####################################################
18 | 
19 | if is_this_a_test:
20 |     corpus = "../catch/test/catch_output_invert"
21 |     concepts_to_remove = "../bandersnatch/test/snatch_output"
22 |     filter_level = "light" # or "none" or "heavy"
23 |     ngram_count = [1,3]
24 |     graph = True   
25 |     cm = "mediumseagreen"
26 |     limit = 30 # default is top 30 words
27 |     output_name = "../bite/test/bite_output"
28 |     stats_output_name = "../bite/test/bite_output_stats"
29 |     plot_output_name = "../bite/test/bite_output_plot"
30 |     
31 | else:
32 |     corpus = ""
33 |     concepts_to_remove = ""
34 |     filter_level = "light"
35 |     
36 |     ngram_count = [1,3]
37 |     
38 |     graph = True
39 |     cm = "mediumseagreen"
40 |     limit = 30
41 |     
42 |     output_name = "bite_output"
43 |     stats_output_name = "bite_output_stats"
44 |     plot_output_name = "bite_output_plot"
45 | 
46 | ####################################################
47 | 
48 | # End of script
49 | 


--------------------------------------------------------------------------------
/catch/test/social_media_posts.txt:
--------------------------------------------------------------------------------
 1 | Any small pokemon nearby? I need to catch a Metapod!
 2 | I think only gen 6 pokemon are on this path - try route 2.
 3 | I've checked that route twice already. I just want my Caterpie to evolve already.
 4 | 
 5 | Anyone else dislike the new Pokedex? What happened to old fashioned gen 1?
 6 | 
 7 | Anyone want a battle? I'm on route 13.
 8 | No thanks, I'm, trying to catch a flying type in the mountatins with the clear air.
 9 | I'll be there soon. Need to heal first.
10 | Currently on the opposite path, training for Brock's gym.
11 | I'll join too - it'll be nice for Ekans to get some practice - I'll be flying by air.
12 | Really? Still with an Ekans? Any chance it'll evolve soon?.
13 | Not yet. Taking is slow - Team Rocket are after one.
14 | I'll join but we should move to the meadow area near that route.
15 | So where are you guys meeting again?
16 | 
17 | Remember to be careful everyone! If you see any grunts of Team Rocket please notify us via your pokedex. We are always alert on the path if you need assistance.
18 | Feel free to pop in the centre if you want to rest.
19 | 
20 | Hey guys! Lake near route 7 I saw a Gyarados!
21 | I need one for my pokedex - luckily I'm only a few minutes away, I won't be too long, just coming down the path now.
22 | Hurry it'll mega-evolve! The other one flew away and I was too late.
23 | What is mega-evolve?
24 | What?! You've not heard of it?!
25 | Drat! I didn't know it took a while for Magikarps to evolve.
26 | 
27 | What route is best for small normal pokemon? My Skitty needs a friend.
28 | Not sure.
29 | Do you mean cat-breed pokemon?
30 | The path near the school is always a good choice.
31 | Go to route 4, we're totally not on that path and we don't plan on catching your Skitty away from you.
32 | 


--------------------------------------------------------------------------------
/catch/README.md:
--------------------------------------------------------------------------------
 1 | # README - `catch`
 2 | 
 3 | ## `test/` & `output/`
 4 | - directories for results
 5 | 
 6 | ***
 7 | 
 8 | ## `is_this_a_test`
 9 | - set to `True` to run the test, see `test/` for the results
10 | 
11 | ***
12 | 
13 | ## `file_corpus`
14 | - a `.txt` file with each post/sentence on a new line
15 | 
16 | ## `file_words_of_interest`
17 | - a `.txt` file with concepts/words of interest separated by a new line
18 | - can be `snatch` output
19 | - script will run if empty so users can use other features - please ensure you check outputs
20 | 
21 | ## `filter_level`
22 | - parameter for which list of stop words to use
23 | - `light` is a small list consisting of 179 stop words
24 | - `heavy` is much larger consisting of 1160 stop words
25 | - `none` to not remove stop words
26 | 
27 | ## `output_format`
28 | - `wtags` = each annotated post **with** the terms that were annotated
29 | - `grep` = output in grep format (simply the annotated posts only)
30 | - `invertedgrep` = posts that were NOT annotated
31 | 
32 | ## `output_name`
33 | - users should edit the output name, these will be stored in `output/`
34 | - all outputs are timestamped to avoid overwriting files
35 | 
36 | ## `plotWORDCLOUD`
37 | - set `True` to plot a wordcloud of `file_corpus`
38 | - if you intend to plot, it is recommended to use a filter level for stop words
39 | 
40 | ### `colormapWC`
41 | - colour scheme for the wordcloud - users can provide [any palette](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
42 | - default is `Set3` (pastel) but a nice recommendation is `viridis` (purple -> green)
43 | 
44 | ## `plotCYANNOTATOR`
45 | - set `True` to output an `HTML` of annotated posts with the highlighted concepts
46 | 
47 | ### `highlightcolour`
48 | - colour of highlighting - default is cyan (`#00bcd4`)
49 | 
50 | ***
51 | 
52 | End of page
53 | 


--------------------------------------------------------------------------------
/requirements.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @description: versions
 7 | @GitHub: github.com/sap218/jabberwocky
 8 | """
 9 | 
10 | # Modules used in Jabberwocky
11 | 
12 | import sys # this includes: import time
13 | print("Python \t v%s" % sys.version.split(" ")[0])
14 | 
15 | import bs4
16 | print("BeautifulSoup4 \t v%s" % bs4.__version__)
17 | 
18 | '''
19 | # Base modules
20 | 
21 | import re
22 | print("re \t v%s" % re.__version__)
23 | 
24 | import json
25 | print("json \t v%s" % json.__version__)
26 | '''
27 | 
28 | import contractions
29 | import pkg_resources
30 | version = pkg_resources.get_distribution("contractions").version
31 | print("contractions \t v%s" % version)
32 | del version
33 | 
34 | import pandas as pd
35 | print("pandas \t v%s" % pd.__version__)
36 | 
37 | import matplotlib
38 | print("matplotlib \t v%s" % matplotlib.__version__)
39 | 
40 | import sklearn
41 | print("scikit-learn \t v%s" % sklearn.__version__) 
42 | 
43 | import spacy
44 | print("spaCy \t v%s" % spacy.__version__)
45 | 
46 | import wordcloud 
47 | print("wordcloud \t v%s" % wordcloud.__version__)
48 | 
49 | import nltk
50 | print("nltk \t v%s" % nltk.__version__)
51 | 
52 | import networkx
53 | print("networkx \t v%s" % networkx.__version__)
54 | 
55 | print("additional information: \t %s" % "".join(sys.version.split("|")[1:]))
56 | 
57 | # When running Jabberwocky, users need these versions minimum
58 | 
59 | '''
60 | Python 	 v3.12.3
61 | BeautifulSoup4 	 v4.12.3
62 | contractions 	 v0.1.73
63 | pandas 	 v2.2.2
64 | matplotlib 	 v3.9.2
65 | scikit-learn 	 v1.5.1
66 | spaCy 	 v3.7.2
67 | wordcloud 	 v1.9.4
68 | nltk 	 v3.9.1
69 | networkx 	 v3.3
70 | additional information: 	  packaged by conda-forge  (main, Apr 15 2024, 18:20:11) [MSC v.1938 64 bit (AMD64)]
71 | '''
72 | 
73 | ####################################################
74 | 
75 | # End of script
76 | 


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines / Issues for Jabberwocky :dragon_face:
 2 | 
 3 | * Users are welcome to contribute to this project via pull requests or bug reporting.
 4 | * In either circumstance, please ensure titles/descriptions have as much information as possible, e.g. if creating a Bug/Issue, try and trace your steps w/ details & error messages.
 5 | * The primary maintainer(s) - currently [@sap218](https://github.com/sap218) - will address the request!
 6 | * Maintainers will always try their best to meet the needs of the user but also considering what is best for **Jabberwocky**.
 7 | 
 8 | ## Contributing Code
 9 | * Users intending to contribute to this repository can open a **Pull request**.
10 | * Frequent contributors will be added to a contributors list for thanks and acknowledgement.
11 | * **Note**: please provide information (e.g. decisions) and plenty of comments (w/ username to acknowledge contribution), e.g.
12 | 
13 | ```
14 | print(f"Error message: {errmsg}") # print error message (example comment) for reference - @yourusername
15 | ``` 
16 | 
17 | ## Issues
18 | * Users are encouraged to create an [`Issue`](https://github.com/sap218/jabberwocky/issues).
19 | * Issues can relate to anything: bug/error reporting, feature requests, help questions, to improve documentation, etc.
20 | * Issues will be labelled accordingly - see below for [`label`](https://github.com/sap218/jabberwocky/labels) information:
21 | 
22 | #### bug
23 | * if any errors arrise
24 | 
25 | #### documentation
26 | * if the guides need more information
27 | 
28 | #### duplicate
29 | * if the ticket already exists
30 | 
31 | #### help
32 | * if the user is asking for help (not relating to other tags)
33 | 
34 | #### request
35 | * if a new feature is being requested
36 | 
37 | #### wontfix
38 | * there may be circumstances that an Issue *won't* or *shouldn't* be fixed
39 | * for example, some behaviours may be intentional, or a fix isn't in scope
40 | * the maintainer will help as much as possible and comment why this label is applied, giving users time to respond
41 | 
42 | ***
43 | 
44 | End of page
45 | 


--------------------------------------------------------------------------------
/arise/arise.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @description: annotate ontology classes
 7 | @GitHub: github.com/sap218/jabberwocky
 8 | 
 9 | @useful links:
10 |     # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#insert
11 | """
12 | 
13 | from bs4 import BeautifulSoup
14 | import pandas as pd
15 | 
16 | from params_arise import *
17 | 
18 | ####################################################
19 | 
20 | with open("%s.owl" % ontology_name, "rt") as o:
21 |     ontology_file = o.read()  
22 | ontology_soup = BeautifulSoup(ontology_file,'xml') # BEAUTIFUL SOUP really is beautiful
23 | del o, ontology_file
24 | 
25 | ####################################################
26 | 
27 | try:
28 |     annotations = pd.read_csv('%s.tsv' % annotation_file, sep='\t', header=0)
29 | except:
30 |     annotations = pd.read_csv('%s.csv' % annotation_file, header=0)
31 | 
32 | ####################################################
33 | 
34 | finding = ontology_soup.find_all('owl:Class') # finding all owl classes
35 | for concept in finding:
36 |     label = concept.find("rdfs:label").get_text()#.lower() # getting labels
37 |     
38 |     for term_iteration in range(len(annotations)): # going through each row on the tf-idf dataframe
39 |     
40 |         class_match_label = list(annotations['class'])[term_iteration]
41 |         class_new_annotations = list(annotations['annotation'])[term_iteration]
42 |         new_annotation_tag = list(annotations['tag'])[term_iteration]
43 | 
44 |         if label == class_match_label:
45 |             tag = ontology_soup.new_tag(new_annotation_tag)
46 |             tag.string = class_new_annotations 
47 |             concept.insert(1, tag) # insert after line one (line one is declaring the ontology concept)
48 | 
49 | ####################################################
50 | 
51 | updated_ont = str(ontology_soup).replace('<?xml version="1.0" encoding="utf-8"?>', '<?xml version="1.0"?>') # replacing first line - very important
52 | 
53 | ####################################################
54 | 
55 | with open("%s_updated.owl" % output_name, "w") as file: # exporting # encoding="utf-8"
56 |     file.write(updated_ont)
57 | 
58 | ####################################################
59 | 
60 | # End of script
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Jabberwocky
 2 | 
 3 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.02168/status.svg)](https://doi.org/10.21105/joss.02168) 
 4 | 
 5 | Jabberwocky is a toolkit for NLP and **ontologies**. Since we all know ontologies are *nonsense*.
 6 | 
 7 | ## Functionality
 8 | 
 9 | Read the [documentation](https://sap218.github.io/jabberwocky/) for more detail.
10 | 
11 | script | description
12 | ------- | -----------
13 | `bandersnatch` | extract metadata from ontology classes
14 | `catch` | annotate corpus with key terms & generate wordcloud
15 | `bite`  | rank terms in order of importance & bar plot
16 | `arise` | update ontology with new metadata
17 | `eyes` | plot an ontology via web or tree format
18 | 
19 | When combining these Jabberwocky functions, users can create an NLP workflow.
20 | 
21 | ![workflow](/docs/workflow.png)
22 | 
23 | ## Running
24 | Within each directory, there is a file `params_*.py` which users can edit.
25 | Meaning users shouldn't need to edit the main/primary script.
26 | 
27 | Check the individual directory `READMEs` for parameter information.
28 | 
29 | #### Prerequisites
30 | Check [`requirements.py`](https://github.com/sap218/jabberwocky/blob/master/requirements.py) for a list of packages and versions.
31 | 
32 | ## Changelog / Version
33 | See the [**Changelog**](https://github.com/sap218/jabberwocky/blob/master/Changelog.md) (ordered by newest first).
34 | 
35 | ## Contributing / Issues
36 | Please read the [**Contributing Guidelines**](https://github.com/sap218/jabberwocky/blob/master/Contributing.md).
37 | 
38 | - [@majensen](https://github.com/majensen) set up automated testing w/ `pytest` in v1.0 - see [pull request #13](https://github.com/sap218/jabberwocky/pull/13) for more details
39 | 
40 | ## License 
41 | The [license](https://github.com/sap218/jabberwocky/blob/master/LICENSE) is **MIT** and so users only need to cite (below) if using.
42 | 
43 | ## Citing
44 | 
45 | ```
46 | @article{Pendleton2020,
47 |   doi = {10.21105/joss.02168},
48 |   url = {https://doi.org/10.21105/joss.02168},
49 |   year = {2020},
50 |   publisher = {The Open Journal},
51 |   volume = {5},
52 |   number = {51},
53 |   pages = {2168},
54 |   author = {Samantha C. Pendleton and Georgios V. Gkoutos},
55 |   title = {Jabberwocky: an ontology-aware toolkit for manipulating text},
56 |   journal = {Journal of Open Source Software}
57 | }
58 | ```
59 | 
60 | The poem, Jabberwocky, written by Lewis Carrol, is described as a "nonsense" poem :dragon:
61 | 
62 | ***
63 | 
64 | End of page
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # gitignore
  2 | # Files not needed to be uploaded
  3 | 
  4 | eyes/test/other_ontologies/
  5 | 
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | pip-wheel-metadata/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 


--------------------------------------------------------------------------------
/bite/test/bite_output.tsv:
--------------------------------------------------------------------------------
  1 | Word	Raw score	Normalised score
  2 | mega	0.071	1.0
  3 | hear	0.056	0.74
  4 | sure	0.056	0.74
  5 | path	0.044	0.542
  6 | one	0.043	0.536
  7 | take	0.04	0.475
  8 | guy	0.039	0.469
  9 | meet	0.039	0.469
 10 | fly	0.038	0.44
 11 | soon	0.037	0.43
 12 | need	0.037	0.424
 13 | ekan	0.031	0.333
 14 | away	0.027	0.268
 15 | air	0.026	0.251
 16 | always	0.026	0.243
 17 | take_slow_one	0.024	0.213
 18 | yet	0.024	0.213
 19 | slow	0.024	0.213
 20 | yet_take_slow	0.024	0.213
 21 | breed	0.023	0.192
 22 | cat	0.023	0.192
 23 | mean	0.023	0.192
 24 | mean_cat_breed	0.023	0.192
 25 | cat_breed_pokemon	0.023	0.192
 26 | pokemon	0.023	0.192
 27 | i_soon_need	0.022	0.18
 28 | soon_need_heal	0.022	0.18
 29 | first	0.022	0.18
 30 | heal	0.022	0.18
 31 | need_heal_first	0.022	0.18
 32 | pokedex	0.021	0.172
 33 | know_take_magikarp	0.021	0.17
 34 | know	0.021	0.17
 35 | drat_i_know	0.021	0.17
 36 | drat	0.021	0.17
 37 | i_know_take	0.021	0.17
 38 | magikarp	0.021	0.17
 39 | really_still_ekan	0.02	0.151
 40 | chance	0.02	0.151
 41 | really	0.02	0.151
 42 | still	0.02	0.151
 43 | still_ekan_chance	0.02	0.151
 44 | ekan_chance_soon	0.02	0.151
 45 | school_always_good	0.018	0.118
 46 | choice	0.018	0.118
 47 | near	0.018	0.118
 48 | near_school_always	0.018	0.118
 49 | good	0.018	0.118
 50 | path_near_school	0.018	0.118
 51 | always_good_choice	0.018	0.118
 52 | school	0.018	0.118
 53 | currently	0.018	0.114
 54 | currently_opposite_path	0.018	0.114
 55 | training_brock_gym	0.018	0.114
 56 | brock	0.018	0.114
 57 | path_training_brock	0.018	0.114
 58 | gym	0.018	0.114
 59 | opposite	0.018	0.114
 60 | opposite_path_training	0.018	0.114
 61 | training	0.018	0.114
 62 | hurry_mega_one	0.018	0.11
 63 | hurry	0.018	0.11
 64 | one_fly_away	0.018	0.11
 65 | fly_away_i	0.018	0.11
 66 | late	0.018	0.11
 67 | away_i_late	0.018	0.11
 68 | mega_one_fly	0.018	0.11
 69 | rest	0.018	0.107
 70 | pop_centre_want	0.018	0.107
 71 | pop	0.018	0.107
 72 | want	0.018	0.107
 73 | centre	0.018	0.107
 74 | centre_want_rest	0.018	0.107
 75 | free_pop_centre	0.018	0.107
 76 | feel	0.018	0.107
 77 | feel_free_pop	0.018	0.107
 78 | free	0.018	0.107
 79 | ekan_get_practice	0.015	0.069
 80 | nice	0.015	0.069
 81 | get	0.015	0.069
 82 | i_fly_air	0.015	0.069
 83 | i_join_nice	0.015	0.069
 84 | nice_ekan_get	0.015	0.069
 85 | get_practice_i	0.015	0.069
 86 | join	0.015	0.069
 87 | join_nice_ekan	0.015	0.069
 88 | practice	0.015	0.069
 89 | practice_i_fly	0.015	0.069
 90 | type	0.015	0.058
 91 | type_mountatin_clear	0.015	0.058
 92 | try_catch_fly	0.015	0.058
 93 | thank	0.015	0.058
 94 | thank_i_try	0.015	0.058
 95 | try	0.015	0.058
 96 | mountatin_clear_air	0.015	0.058
 97 | clear	0.015	0.058
 98 | catch_fly_type	0.015	0.058
 99 | fly_type_mountatin	0.015	0.058
100 | catch	0.015	0.058
101 | mountatin	0.015	0.058
102 | i_try_catch	0.015	0.058
103 | away_i_long	0.013	0.036
104 | luckily	0.013	0.036
105 | come	0.013	0.036
106 | i_long_come	0.013	0.036
107 | i_minute_away	0.013	0.036
108 | i_need_one	0.013	0.036
109 | long	0.013	0.036
110 | one_pokedex_luckily	0.013	0.036
111 | long_come_path	0.013	0.036
112 | luckily_i_minute	0.013	0.036
113 | pokedex_luckily_i	0.013	0.036
114 | minute	0.013	0.036
115 | minute_away_i	0.013	0.036
116 | need_one_pokedex	0.013	0.036
117 | 


--------------------------------------------------------------------------------
/bandersnatch/bandersnatch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @description: curate classes (& synonyms) from an ontology
 7 | @GitHub: github.com/sap218/jabberwocky
 8 | 
 9 | @useful links:
10 |     # https://stackoverflow.com/questions/35898699/why-is-beautifulsoup-altering-the-format-of-my-xml
11 | """
12 | 
13 | import sys
14 | from bs4 import BeautifulSoup
15 | 
16 | from params_snatch import *
17 | 
18 | ####################################################
19 | 
20 | with open("%s.owl" % ontology_name, "rt") as o:
21 |     ontology_file = o.read()  
22 | ontology_soup = BeautifulSoup(ontology_file,'xml') # BEAUTIFUL SOUP really is beautiful
23 | del o, ontology_file
24 | 
25 | annotation_tags = []
26 | with open("%s.txt" % ontology_tags, "r") as t:
27 |     for tag in t:
28 |         annotation_tags.append(tag.strip("\n"))
29 | del tag, t
30 | 
31 | ####################################################
32 | 
33 | find_all_concepts = ontology_soup.find_all('owl:Class') # this finds all concepts in the ontology
34 | classes_and_annotations = {}
35 | for concept in find_all_concepts:
36 |     label = concept.find("rdfs:label").get_text() # gets label for concept
37 |     list_annotations = []
38 |     for tag_format in annotation_tags: 
39 |         finding_tags = concept.find_all(tag_format) # a concept could have multiple "exact synonyms" 
40 |         flatten = [x.get_text() for x in finding_tags] 
41 |         list_annotations.extend(flatten)
42 |     classes_and_annotations[label] = list_annotations
43 | del find_all_concepts, flatten, label, list_annotations, finding_tags, tag_format, annotation_tags
44 | 
45 | ####################################################
46 | 
47 | if len(classes_of_interest) > 0:
48 |     try:
49 |         words_of_interest = []
50 |         with open("%s.txt" % classes_of_interest, "r") as t:
51 |             for word in t:
52 |                 words_of_interest.append(word.strip("\n").strip(" ")) # words of interest
53 |         print("User has provided a list of ontology classes of interest - success")
54 |         del t, word
55 |         
56 |     except FileNotFoundError:
57 |         sys.exit("User attempted to provide a list with ontology classes of interest - unsuccessful")
58 | 
59 | else:
60 |     words_of_interest = None
61 |     print("User not providing a list of ontology classes of interest - using all classes for annotations")
62 | 
63 | ####################################################
64 | 
65 | if words_of_interest: 
66 |     search_concepts = {key: classes_and_annotations[key] for key in words_of_interest}
67 | else:
68 |     search_concepts = classes_and_annotations.copy()
69 | 
70 | ####################################################
71 | 
72 | #with open('test/snatch_output.json', 'w') as j:
73 | #    json.dump(search_concepts, j, indent=4)
74 | #del j
75 | 
76 | ####################################################
77 | 
78 | search_concepts = [key_val for key, value in search_concepts.items() for key_val in [key] + value]
79 | 
80 | with open('%s.txt' % output_name, 'w') as t:
81 |     for word in search_concepts:
82 |         t.write(word + '\n')
83 | del t, word
84 | 
85 | ####################################################
86 | 
87 | # End of script
88 | 


--------------------------------------------------------------------------------
/eyes/eyes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | @date: 2024
  5 | @author: Samantha C Pendleton
  6 | @description: plot an ontology
  7 | @GitHub: github.com/sap218/jabberwocky
  8 | 
  9 | @useful links:
 10 |     # https://stackoverflow.com/a/21990980
 11 | """
 12 | 
 13 | from bs4 import BeautifulSoup
 14 | import networkx as nx
 15 | import matplotlib.pyplot as plt
 16 | from textwrap import wrap
 17 | 
 18 | from params_eyes import *
 19 | 
 20 | ####################################################
 21 | 
 22 | with open("%s.owl" % ontology, "rt") as o:
 23 |     ontology_file = o.read()  
 24 | ontology_soup = BeautifulSoup(ontology_file,'xml') # BEAUTIFUL SOUP really is beautiful
 25 | del o, ontology
 26 | 
 27 | ####################################################
 28 | 
 29 | G = nx.DiGraph() # graph
 30 | 
 31 | ####################################################
 32 | 
 33 | finding = ontology_soup.find_all('owl:Class') # finding all owl classes
 34 | concepts = []
 35 | 
 36 | for concept in finding:
 37 |     label = concept.find("rdfs:label").get_text() 
 38 |     concepts.append(label)
 39 |     iri = concept.get('rdf:about')
 40 |     
 41 |     G.add_node(label, id=iri) # node for each class
 42 | 
 43 |     # find superclass and add edges
 44 |     subclasses = concept.find_all("rdfs:subClassOf")
 45 |     for subclass in subclasses:
 46 |         superclass = subclass.get('rdf:resource')
 47 |         # now get label of superclass...
 48 |         subclass_label = ontology_soup.find(attrs={"rdf:about": superclass}).find("rdfs:label").get_text()
 49 |         G.add_edge(subclass_label, label) # add edge for relationship
 50 | 
 51 | del finding, iri, label, subclass_label, subclass, superclass, subclasses
 52 | 
 53 | ####################################################
 54 | 
 55 | # G.remove_node("Space Ontology (UFO)")
 56 | 
 57 | high_level_classes = [node for node, degree in G.in_degree() if degree == 0]
 58 | color_map = [superclass_colour if node in high_level_classes else subclass_colour for node in G.nodes()]
 59 | 
 60 | ####################################################
 61 | 
 62 | plt.figure(figsize=(18, 10))
 63 | 
 64 | if plot_type == "tree":
 65 |     pos = nx.nx_agraph.graphviz_layout(G, prog='dot')       
 66 | elif plot_type == "web": 
 67 |     pos = nx.nx_agraph.graphviz_layout(G, prog='sfdp')
 68 | 
 69 | ####################################################
 70 | 
 71 | node_degrees = dict(G.degree())
 72 | node_sizes = [15 * node_degrees[node] for node in G.nodes()]
 73 | 
 74 | if plot_type == "web":
 75 |     min_lim = int( sorted(node_sizes,reverse=True)[:11][-1] )
 76 |     node_sizes = [15 if n <= min_lim else n for n in node_sizes]
 77 | 
 78 | ####################################################
 79 | 
 80 | nx.draw_networkx_nodes(G, pos, 
 81 |                        node_size=node_sizes,
 82 |                        node_color=color_map,
 83 |                        alpha=0.8)
 84 | 
 85 | nx.draw_networkx_edges(G, pos, edge_color="gray", alpha=0.5, width=1.0, arrows=True)
 86 | 
 87 | ####################################################
 88 | 
 89 | if to_annotate_subclasses:
 90 |     highlevelfontsize = 8
 91 |     lowlevelfontsize = 6
 92 | else: highlevelfontsize = 14
 93 | 
 94 | ####################################################
 95 | 
 96 | labels = {node: '\n'.join(wrap(node, width=11)) if node in high_level_classes else node for node in G.nodes() if node in high_level_classes}
 97 | nx.draw_networkx_labels(G, pos, font_size=highlevelfontsize, font_weight="bold",labels=labels)
 98 | 
 99 | labels = {node: '\n'.join(wrap(node, width=15)) if node not in high_level_classes else node for node in G.nodes() if node not in high_level_classes}
100 | if to_annotate_subclasses: nx.draw_networkx_labels(G, pos, font_size=lowlevelfontsize, labels=labels)
101 | 
102 | ####################################################
103 | 
104 | #plt.title("Ontology")
105 | plt.axis('off')
106 | plt.savefig("%s_%s.png" % (output_name, plot_type), format="PNG", dpi=300, bbox_inches='tight')
107 | plt.show()
108 | 
109 | ####################################################
110 | 
111 | # End of script
112 | 


--------------------------------------------------------------------------------
/docs/paper.bib:
--------------------------------------------------------------------------------
  1 | @ARTICLE{Cejuela2014-lv,
  2 |   title    = "tagtog: interactive and text-mining-assisted annotation of gene
  3 |               mentions in {PLOS} full-text articles",
  4 |   author   = "Cejuela, Juan Miguel and McQuilton, Peter and Ponting, Laura and
  5 |               Marygold, Steven J and Stefancsik, Raymund and Millburn, Gillian
  6 |               H and Rost, Burkhard and {FlyBase Consortium}",
  7 |   journal  = "Database",
  8 |   volume   =  2014,
  9 |   number   =  0,
 10 |   month    =  apr,
 11 |   year     =  2014,
 12 |   language = "en",
 13 |   doi      = {10.1093/database/bau033}
 14 | }
 15 | 
 16 | @MISC{Honnibal2017-dn,
 17 |   title   = "{s}pa{C}y 2: Natural language understanding with {B}loom embeddings,
 18 |              convolutional neural networks and incremental parsing",
 19 |   author  = "Honnibal, Matthew and Montani, Ines",
 20 |   url	  = {https://github.com/explosion/spaCy},
 21 |   year    =  2017
 22 | }
 23 | 
 24 | @INPROCEEDINGS{Manning2014-rt,
 25 |   title     = "The {S}tanford {CoreNLP} natural language processing toolkit",
 26 |   booktitle = "{P}roceedings of 52nd {A}nnual {M}eeting of the {A}ssociation for
 27 |                {C}omputational {L}inguistics: {S}ystem {D}emonstrations",
 28 |   author    = "Manning, Christopher and Surdeanu, Mihai and Bauer, John and
 29 |                Finkel, Jenny and Bethard, Steven and McClosky, David",
 30 |   pages     = "55--60",
 31 |   year      =  2014,
 32 |   doi       = {10.3115/v1/p14-5010}
 33 | }
 34 | 
 35 | @ARTICLE{Schriml2012-qp,
 36 |   title    = "Disease Ontology: a backbone for disease semantic integration",
 37 |   author   = "Schriml, Lynn Marie and Arze, Cesar and Nadendla, Suvarna and
 38 |               Chang, Yu-Wei Wayne and Mazaitis, Mark and Felix, Victor and
 39 |               Feng, Gang and Kibbe, Warren Alden",
 40 |   journal  = "Nucleic Acids Res.",
 41 |   volume   =  40,
 42 |   number   = "Database issue",
 43 |   month    =  jan,
 44 |   year     =  2012,
 45 |   language = "en",
 46 |   doi      = {10.1093/nar/gkr972}
 47 | }
 48 | 
 49 | @ARTICLE{Robinson2008-jh,
 50 |   title    = "The Human Phenotype Ontology: a tool for annotating and analyzing
 51 |               human hereditary disease",
 52 |   author   = "Robinson, Peter N and K{\"o}hler, Sebastian and Bauer, Sebastian
 53 |               and Seelow, Dominik and Horn, Denise and Mundlos, Stefan",
 54 |   journal  = "Am. J. Hum. Genet.",
 55 |   volume   =  83,
 56 |   number   =  5,
 57 |   pages    = "610--615",
 58 |   month    =  nov,
 59 |   year     =  2008,
 60 |   language = "en",
 61 |   doi      = {10.1016/j.ajhg.2008.09.017}
 62 | }
 63 | 
 64 | @ARTICLE{Hoehndorf2015-qr,
 65 |   title    = "The role of ontologies in biological and biomedical research: a
 66 |               functional perspective",
 67 |   author   = "Hoehndorf, Robert and Schofield, Paul N and Gkoutos, Georgios V",
 68 |   journal  = "Brief. Bioinform.",
 69 |   volume   =  16,
 70 |   number   =  6,
 71 |   pages    = "1069--1080",
 72 |   month    =  nov,
 73 |   year     =  2015,
 74 |   keywords = "Semantic Web; data integration; data mining; ontology",
 75 |   language = "en",
 76 |   doi      = {10.1093/bib/bbv011}
 77 | }
 78 | 
 79 | 
 80 | 
 81 | @ARTICLE{Van_Rossum1995-ia,
 82 |   title   = "Python tutorial, technical report {CS-R9526}",
 83 |   author  = "van Rossum, G",
 84 |   journal = "Centrum voor Wiskunde en Informatica (CWI), Amsterdam",
 85 |   year    =  1995
 86 | }
 87 | 
 88 | @ARTICLE{Richardson2007-ba,
 89 |   title     = "Beautiful soup documentation",
 90 |   author    = "Richardson, Leonard",
 91 |   journal   = "April",
 92 |   publisher = "media.readthedocs.org",
 93 |   year      =  2007,
 94 |   url	    = {https://beautiful-soup-4.readthedocs.io/en/latest/}
 95 | }
 96 | 
 97 | @ARTICLE{Pedregosa2011-st,
 98 |   title    = "Scikit-learn: Machine Learning in {P}ython",
 99 |   author   = "Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre
100 |               and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and
101 |               Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and
102 |               Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and
103 |               Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and
104 |               Duchesnay, {\'E}douard",
105 |   journal  = "J. Mach. Learn. Res.",
106 |   volume   =  12,
107 |   number   = "Oct",
108 |   pages    = "2825--2830",
109 |   year     =  2011
110 | }
111 | 
112 | @INPROCEEDINGS{McKinney2010-xf,
113 |   title       = "Data structures for statistical computing in {P}ython",
114 |   booktitle   = "Proceedings of the 9th {P}ython in {S}cience {C}onference",
115 |   author      = "McKinney, Wes and {Others}",
116 |   volume      =  445,
117 |   pages       = "51--56",
118 |   institution = "Austin, TX",
119 |   year        =  2010,
120 |   doi	      = {10.25080/majora-92bf1922-00a}
121 | }
122 | 


--------------------------------------------------------------------------------
/docs/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'Jabberwocky: an ontology-aware toolkit for manipulating text'
 3 | tags:
 4 |   - Python
 5 |   - Ontologies
 6 |   - Text
 7 | authors:
 8 |   - name: Samantha C Pendleton
 9 |     orcid: 0000-0002-6169-0135
10 |     affiliation: "1, 2" # (Multiple affiliations must be quoted)
11 |   - name: Georgios V Gkoutos
12 |     affiliation: "1, 2" # (Multiple affiliations must be quoted)
13 | affiliations:
14 |  - name: Institute of Cancer and Genomic Sciences, University of Birmingham, UK
15 |    index: 1
16 |  - name: University Hospitals Birmingham NHS Foundation Trust, UK
17 |    index: 2
18 | date: 25 February 2020
19 | bibliography: paper.bib
20 | 
21 | ---
22 | 
23 | # Summary
24 | 
25 | Unstructured textual data is underused, as extracting the key textual elements is complicated by a lack of structured terms, e.g., collecting the sentences from a corpus that are discussing a particular topic. To extract valuable text about a topic from a corpus, a user will need to gather a set of related terms. For example, when analysing clinical documents we can extract sentences by using specific clinicial terms. However this can miss additional valuable sentences where synonyms are used instead (e.g., physician notes that use shorthand). By considering terms and their synonyms we can extract more sentences from a corpus, making more data available for analysis. One way to do this and represent our knowledge of terms associated with a domain is to create an ontology. Ontologies allow us to formalise our knowledge of a domain in a condensed manner by using controlled terms, called classes [@Hoehndorf2015-qr]. Classes can be annotated with metadata, including synonyms. Ontologies can include relationships between terms, and annotations such as cross-references to other ontologies [@Hoehndorf2015-qr].
26 | 
27 | Clearly, ontologies are valuable for the analysis of textual data. Unfortunately, despite the existence of many well-established ontologies, such as the "Human Phenotype Ontology" [@Robinson2008-jh] and the "Disease Ontology" [@Schriml2012-qp], there remains a lack of tools that can take advantage of ontologies, especially for general text manipulation. Existing tools for annotating text, such as “spaCy” [@Honnibal2017-dn], “tagtog” [@Cejuela2014-lv], and “Stanford CoreNLP” [@Manning2014-rt] cannot interrogate text with an ontology directly, and require ontologies to be pre-processed into other formats (leaving the time-consuming task of extracting labels and tags from an ontology into a suitable intermediate format as an exercise for the end-user). These are specialist tools, returning all text in the document with every word tagged, as “noun”, “verb”, and other customised tags. There exists a niche for users who want to leverage an ontology to retrieve textual data from a corpus without having to perform any pre-processing, or parse away unwanted tags.
28 | 
29 | We introduce Jabberwocky, a Python-based [@Van_Rossum1995-ia], open-source toolkit (accessible via https://github.com/sap218/jabberwocky) that allows users to query text in an ontology-aware fashion, and to modify those ontologies based on their findings. For example, with Jabberwocky’s ``catch`` command, a user provides textual data, their chosen ontology, and a set of classes from the ontology to use as search terms. Jabberwocky cleans the input text, collects the annotated synonyms for the user-specified target classes (using “Beautiful Soup” to read the ontology’s XML structure [@Richardson2007-ba]), and then returns the key elements (e.g., lines from a corpus) which match one of the target terms, or a synonym from the ontology. The ``catch`` command will help users retrieve more matches for their chosen terms from the corpus, without users having to explicitly define all the possible synonyms or alternative spellings beforehand.
30 | 
31 | Jabberwocky also helps ontology developers to iteratively improve their ontology. The ``bite`` command allows a user to provide textual data and rank the important terms by using the term frequency–inverse document frequency (tf-idf) method from “scikit-learn” [@Pedregosa2011-st], which calculates an importance metric for a term based on the frequency of its occurrence and the document size. Providing an ontology will exclude terms already described in the ontology, meaning the result of ``bite`` will be a CSV of candidate terms to potentially be added to the ontology, exported by “pandas” [@McKinney2010-xf]. Once an expert has reviewed the terms and associated them to a class in the ontology, Jabberwocky’s third command, ``arise``, will annotate the classes in the ontology, adding the newly identified synonyms. Iteratively performing multiple rounds of ``bite`` and ``arise`` can help the development and maintenance of ontologies. A user could use the ``catch`` command to confirm the modified ontology now captures more of the corpus.
32 | 
33 | Jabberwocky’s test repository (see Jabberwocky repo for further instructions), shows examples of each command separately. The ‘process’ directory shows an example that combines all three commands to demonstrate an example workflow. With 24 blog posts, the first use of ``catch`` returned 11 posts with the provided keywords. The example uses ``bite`` to review the CSV of ranked terms and curated new synonyms, simply by adding the corresponding class label from the ontology. It then uses ``arise`` to add the identified synonyms into the ontology. With the second round of ``catch`` the number of posts returned for the same keywords increased to 16. This is a basic and straightforward example, but powerful. With Jabberwocky, users can efficiently search their text and gain more instances, providing new insight.
34 | 
35 | Jabberwocky leverages the strength of ontologies and text for a wide range of tasks. It will be useful to users who want to manipulate textual data using controlled vocabulary from ontologies.
36 | 
37 | # Acknowledgements
38 | 
39 | Project was funded by the Medical Research Council (MRC) (MR/S502431/1) & supported by Health Data Research (HDR) UK (HDRUK/CFC/01).
40 | 
41 | # References
42 | 
43 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | An ontology is a knowledge representation framework that is machine readable.
  2 | It facilitates logical relationships between classes and allows us to standardise the formalised vocabulary within a domain.
  3 | The metadata contained within an ontology is valuable - research has shown to address the challenges presented by unstructured text.
  4 | 
  5 | Unstructured text can be processed, mined, and empowered by NLP tools, yet majority of tools are not designed to consider ontologies.
  6 | 
  7 | Jabberwocky allows users to conduct various NLP tasks whilst easily manipulating tologies.
  8 | Here provides an explanation - with a working example - for the Jabberwocky toolkit. 
  9 | 
 10 | See the [Jabberwocky](https://github.com/sap218/jabberwocky) repository for code.
 11 | 
 12 | ---
 13 | 
 14 | ## Functionality
 15 | 
 16 | 
 17 | ### bandersnatch
 18 | Extract metadata from ontology classes based on a list of tags.
 19 | 
 20 | Users should use ontologies that are in the `OWL` RDF/XML syntax.
 21 | (if not in this format, users can open their ontology in [Protégé](https://protege.stanford.edu/) and export in correct format)
 22 | 
 23 | Metadata in ontologies are in various formats, below shows a list of tags as an example:
 24 | ```
 25 | oboInOWL:hasExactSynonym
 26 | oboInOWL:hasRelatedSynonym
 27 | ```
 28 | 
 29 | Words of interest (recommended to match ontology)
 30 | ```
 31 | dragon
 32 | water
 33 | large
 34 | ```
 35 | 
 36 | ##### Output
 37 | `snatch_output.txt` will include the ontology classes and corresponding metadata based on chosen classes & tags.
 38 | 
 39 | If users have no words of interest, then the output will include **all ontology classes** but users will still need to include a list of tags.
 40 | 
 41 | ---
 42 | 
 43 | ### catch
 44 | Annotation of a corpus (unstructured text).
 45 | 
 46 | Words of interest - the `bandersnatch` output can be used here:
 47 | ```
 48 | dragon
 49 | water
 50 | ocean
 51 | large
 52 | big
 53 | ```
 54 | It is **important to note**: phrases work in Jabberwocky.
 55 | 
 56 | The corpus should be a `txt` with sentences/posts separated in new lines:
 57 | ```
 58 | This is post 1 in a corpus
 59 | This is post 2
 60 | 
 61 | This is post 3 - as you can see there is a gap between post 2 and 3, this is fine
 62 | This is post 4 > users also don't need to worry about formatting, Jabberwocky will handle this'
 63 | ```
 64 | 
 65 | ##### Output
 66 | `catch_output.txt` will include the posts that were annotated.
 67 | Users can choose output types: perhaps only the posts annotated or w/ corresponding tags.
 68 | 
 69 | Moreover, users can choose to export the posts that were **NOT** annotated.
 70 | 
 71 | ##### Plotting
 72 | Users can generate a wordcloud figure from the corpus.
 73 | 
 74 | ##### Highlighting
 75 | Inspired by an old project - [cyannotator](https://github.com/sap218/cyannotator) - users can request an HTML output of posts with the annotations highlighted.
 76 | 
 77 | ---
 78 | 
 79 | ### bite
 80 | Rank all words in a corpus in terms of importance (via the TF-IDF statistical technique).
 81 | 
 82 | One valuable parameter is being able to adjust input for TF-IDF so the technique measures multiple n-grams.
 83 | Users can request more than unigrams: bigrams, trigrams, and more.
 84 | 
 85 | Users can provide a list of words to remove from the corpus to avoid being weighted/measured - the `bandersnatch` output can be used here.
 86 | 
 87 | ##### Output
 88 | `bite_output.tsv` is a dataframe with Word and Score.
 89 | Scores are the average TF-IDF values across posts, normalised for readability.
 90 | Moreover, normalised scores that are 0 are dropped.
 91 | 
 92 | Word | Raw score | Normalised score
 93 | ------- | -----------
 94 | mega | 0.078 | 1.0
 95 | path | 0.06 | 0.719
 96 | 
 97 | ##### Plotting
 98 | Users can export a bar plot of the top N ranked terms (default 30).
 99 | 
100 | ---
101 | 
102 | ### arise
103 | Updating ontology classes with new metadata. 
104 | 
105 | Users will provide a dataframe with three columns: the annotation, class (exact ontology match), and tag:
106 | ```
107 | annotation	class	tag
108 | sea	water	oboInOWL:hasExactSynonym
109 | mega	large	oboInOWL:hasRelatedSynonym
110 | https://pokemon.fandom.com/wiki/Types	type	oboInOWL:DbXref
111 | ```
112 | This can be derived from the `bite` output (e.g. synonyms).
113 | 
114 | ##### Output
115 | `[ontology]_updated.owl` is the updated ontology.
116 | 
117 | ---
118 | 
119 | ### eyes
120 | Plot an ontology in web or tree style. 
121 | By default, superclasses will have overlay text but users can choose whether to include for subclasses.
122 | 
123 | ##### Output
124 | `[ontology]_[plottype].png` is the updated ontology.
125 | 
126 | ---
127 | 
128 | ## Scenario
129 | 
130 | You have curated unstructured text: blog posts from a social media platform (with permission of course, in this example I invented these fake conversations).
131 | 
132 | Your aim is to text mine the corpus and only have posts covering a particular topic (or set of topics).
133 | But you realise, although you know some words in this topic of yours, you may be missing related/broad synonyms.
134 | 
135 | This is where **ontologies are useful**. Ontologies are a controlled set of vocabulary with annotations.
136 | 
137 | With your words of interest (ontology classes) you can run `bandersnatch` to extract all synonyms.
138 | 
139 | With these classes and corresponding synonyms, you can annotate the corpus using `catch` - the `PhraseMatcher()` function[^spacy] tags each post in the corpus.
140 | 
141 | You've chosen to have two outputs: one with the annotated posts for downstream analysis.
142 | The other you decided to investigate if there is anything valuable in the posts that weren't annotated.
143 | 
144 | You can proceed to use `bite` - investigating if there are any "important" terms.
145 | The statistical TF-IDF method[^tfidf] is applied and all words are ranked in terms of importance.
146 | Here we can use the whole corpus, or perhaps use the `catch` output with the non-annotated posts.
147 | 
148 | With whatever data we us, with the `bite` output, you may have noticed new terms/synonyms...
149 | You can use `arise` to update your ontology classes with these new synonyms.
150 | 
151 | Finally, you may want to rerun `bandersnatch` to extract an updated list of key terms.
152 | Then we can rerun `catch` for a more fruitful output for our investigations.
153 | 
154 | This concludes the NLP workflow: you noticed the 2nd round of `catch` provides more data and so a more fruitful downstream analysis.
155 | 
156 | ---
157 | 
158 | ## Conclusion
159 | 
160 | This work was published in [JOSS](https://doi.org/10.21105/joss.02168), you can cite here:
161 | 
162 | ```
163 | @article{Pendleton2020,
164 |   doi = {10.21105/joss.02168},
165 |   url = {https://doi.org/10.21105/joss.02168},
166 |   year = {2020},
167 |   publisher = {The Open Journal},
168 |   volume = {5},
169 |   number = {51},
170 |   pages = {2168},
171 |   author = {Samantha C. Pendleton and Georgios V. Gkoutos},
172 |   title = {Jabberwocky: an ontology-aware toolkit for manipulating text},
173 |   journal = {Journal of Open Source Software}
174 | }
175 | ```
176 | 
177 | This repository was inspired by (and the inspiration of) the [OcIMIDo](https://doi.org/10.1016/j.compbiomed.2021.104542) project.
178 | 
179 | [^spacy]: using [spaCy](https://spacy.io/api/phrasematcher)
180 | [^tfidf]: Term frequency inverse document frequency (TF-IDF)
181 | 
182 | ***
183 | 
184 | End of page
185 | 


--------------------------------------------------------------------------------
/bite/bite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | @date: 2024
  5 | @author: Samantha C Pendleton
  6 | @description: conducts TF-IDF
  7 | @GitHub: github.com/sap218/jabberwocky
  8 | 
  9 | @useful links:
 10 |     # https://python-charts.com/matplotlib/styles/
 11 | """
 12 | 
 13 | import sys
 14 | import re
 15 | import time
 16 | import pandas as pd
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | from sklearn.feature_extraction.text import TfidfVectorizer
 20 | from sklearn.preprocessing import MinMaxScaler
 21 | 
 22 | import spacy
 23 | nlp = spacy.load("en_core_web_sm")
 24 | 
 25 | from nltk import ngrams
 26 | 
 27 | from params_bite import *
 28 | 
 29 | ####################################################
 30 | 
 31 | from highlevel import *
 32 | 
 33 | ''' stopWords '''
 34 | if filter_level == "none": stopWords = stopWords[0]
 35 | elif filter_level == "light": stopWords = stopWords[1]
 36 | elif filter_level == "heavy": stopWords = stopWords[2]
 37 | 
 38 | #stopWords = [cleantext(x.lower()) for x in stopWords]
 39 | 
 40 | stopWords_lemma = []
 41 | for word in stopWords:
 42 |     word = cleantext(word.lower())
 43 |     doc = nlp(word)
 44 |     doc_lemma = " ".join([token.lemma_ for token in doc])
 45 |     stopWords_lemma.append(doc_lemma)
 46 | stopWords_lemma_filt = list(filter(None, stopWords_lemma))
 47 | stopWords_lemma_filt_flat = [word for phrase in stopWords_lemma_filt for word in phrase.split()]
 48 | 
 49 | stopWords = list(set(stopWords_lemma_filt_flat))
 50 | del word, doc, doc_lemma, stopWords_lemma, stopWords_lemma_filt, stopWords_lemma_filt_flat
 51 | 
 52 | ####################################################
 53 | ####################################################
 54 | 
 55 | if len(concepts_to_remove) > 0:
 56 |     try:
 57 |         words_of_interest = []
 58 |         with open("%s.txt" % concepts_to_remove, "r") as t:
 59 |             for word in t:
 60 |                 words_of_interest.append(word.strip("\n").strip(" "))
 61 |         del t, word
 62 |     except FileNotFoundError:
 63 |         sys.exit("User attempted to provide a list of concepts to remove from TF-IDF - unsuccessful")
 64 | else: words_of_interest = ["nowordstofilter"]
 65 | 
 66 | words_of_interest = list(filter(None, words_of_interest))
 67 | 
 68 | ####################################################
 69 | 
 70 | words_of_interest_clean_lemma_stpwrd = [] 
 71 | 
 72 | # preprocess concepts: Lemmatize & stopWords
 73 | for concept in words_of_interest: 
 74 |     concept = cleantext(concept.lower())
 75 |     
 76 |     doc = nlp(concept)
 77 |     
 78 |     ## lemma
 79 |     doc_lemma = [token.lemma_ for token in doc]
 80 |     ## stopwords
 81 |     doc_lemma_stpwrd = [remove_stop_words(text, stopWords) for text in doc_lemma]
 82 |     doc_lemma_stpwrd = list(filter(None, doc_lemma_stpwrd))
 83 |     
 84 |     if doc_lemma_stpwrd:
 85 |         words_of_interest_clean_lemma_stpwrd.append(" ".join(doc_lemma_stpwrd).lower())
 86 |     
 87 | del concept, doc, doc_lemma, doc_lemma_stpwrd
 88 | 
 89 | ####################################################
 90 | ####################################################
 91 | 
 92 | list_of_posts = []
 93 | 
 94 | with open("%s.txt" % corpus, "r") as t:
 95 |     for post in t:
 96 |         list_of_posts.append(post.strip("\n").strip(" "))
 97 | del t, post
 98 | list_of_posts = list(filter(None, list_of_posts))
 99 | 
100 | ####################################################
101 | 
102 | list_of_posts_clean_lemma_stpwrd = []
103 | 
104 | for post in list_of_posts:
105 |     post = cleantext(post.lower())
106 |     
107 |     doc = nlp(post)
108 |     
109 |     ## lemma
110 |     doc_lemma = [token.lemma_ for token in doc]
111 |     ## stopwords
112 |     doc_lemma_stpwrd = [remove_stop_words(text, stopWords) for text in doc_lemma]
113 |     doc_lemma_stpwrd = list(filter(None, doc_lemma_stpwrd))
114 |         
115 |     list_of_posts_clean_lemma_stpwrd.append(" ".join(doc_lemma_stpwrd).lower())
116 |     
117 | del post,doc,doc_lemma,doc_lemma_stpwrd
118 | 
119 | ####################################################
120 | ####################################################
121 | 
122 | words_of_interest_clean_lemma_stpwrd.append("evolve")
123 | words_of_interest_clean_lemma_stpwrd.append("team rocket")
124 | 
125 | list_of_posts_clean_lemma_stpwrd.append("evolve")
126 | list_of_posts.append("evolve")
127 | 
128 | ####################################################
129 | ####################################################
130 | 
131 | def remove_phrases(sentences, phrases):
132 |     cleaned_sentences = []
133 |     for sentence in sentences:
134 |         for phrase in phrases:
135 |             sentence = sentence.replace(phrase, '')
136 |         sentence = re.sub(' +', ' ', sentence).strip() # remove double whitespace
137 |         cleaned_sentences.append(sentence)
138 |     return cleaned_sentences
139 | 
140 | list_of_posts_clean_lemma_stpwrd_filtered = remove_phrases(list_of_posts_clean_lemma_stpwrd, words_of_interest_clean_lemma_stpwrd)
141 | 
142 | ####################################################
143 | ####################################################
144 | 
145 | gram_limit = ngram_count.copy()
146 | #gram_limit = [x+1 for x in range(ngram_count)]
147 | 
148 | posts_cln_lmm_stpwrd_flt_ngrm = {}
149 | 
150 | x = 0
151 | for post in list_of_posts_clean_lemma_stpwrd_filtered:    
152 |     ngram_list = []
153 |     for n in gram_limit:
154 |         ngrammed = ngrams(post.split(), n)
155 |         for gram in ngrammed:
156 |             ngram_list.append( "_".join(gram) )
157 |     posts_cln_lmm_stpwrd_flt_ngrm[x] = [ngram_list, post, list_of_posts[x]]
158 |     x = x + 1
159 | del x, post, ngram_list, ngrammed
160 | 
161 | first_index_values = [" ".join(values[0]) for values in posts_cln_lmm_stpwrd_flt_ngrm.values()]
162 | 
163 | ####################################################
164 | 
165 | start_time = time.time()
166 | 
167 | tfidf_vectorizer = TfidfVectorizer()
168 | tfidf_matrix = tfidf_vectorizer.fit_transform(first_index_values)
169 | tfidf_df = pd.DataFrame(data=tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
170 | 
171 | end_time = time.time() - start_time
172 | end_time = str(round(end_time, 3))
173 | print( "Seconds taken to run tf-idf: %s" % end_time)
174 | del start_time, tfidf_matrix, tfidf_vectorizer, first_index_values
175 | 
176 | ####################################################
177 | 
178 | tfidf_df['Sentence'] = list_of_posts_clean_lemma_stpwrd_filtered # col to show original sentences
179 | tfidf_df = tfidf_df[['Sentence'] + [col for col in tfidf_df.columns if col != 'Sentence']] # sentence first col
180 | 
181 | ####################################################
182 | 
183 | summary_scores = tfidf_df.drop(columns=['Sentence']).agg('mean', axis=0)
184 | tfidf_df_sum = pd.DataFrame({'Word': summary_scores.index, 'Raw score': summary_scores.values})
185 | del summary_scores, tfidf_df
186 | 
187 | ####################################################
188 | 
189 | scaler = MinMaxScaler()
190 | tfidf_df_sum['Normalised score'] = scaler.fit_transform(tfidf_df_sum[['Raw score']])
191 | tfidf_df_sum = tfidf_df_sum.sort_values("Normalised score", ascending=False)
192 | del scaler
193 | 
194 | ####################################################
195 | 
196 | df = tfidf_df_sum.copy()
197 | df = df[df['Normalised score'] != 0]
198 | 
199 | df['Raw score'] = df['Raw score'].round(decimals=3)
200 | df['Normalised score'] = df['Normalised score'].round(decimals=3)
201 | 
202 | ####################################################
203 | 
204 | # IDEA add post for users to extrapolate/add context
205 | 
206 | #df['Post'] = df['Word'].apply(lambda x: [v[2] for v in posts_cln_lmm_stpwrd_flt_ngrm.values() if x in v[0]])
207 | #df['Post'] = [list(set(x)) for x in df['Post'] ]
208 | #dfexplode = df.explode('Post')
209 | 
210 | ####################################################
211 | 
212 | df.to_csv('%s.tsv' % output_name, index=False, sep="\t")
213 | 
214 | ####################################################
215 | ####################################################
216 | 
217 | statistics = [
218 |     "time taken to run tf-idf: %s" % end_time,
219 |     "tf-idf raw df length: %s" % str(len(tfidf_df_sum)),
220 |     "tf-idf adj. df length: %s" % str(len(df))
221 |     ]
222 | del end_time, tfidf_df_sum
223 | 
224 | with open('%s.txt' % stats_output_name, 'w') as t:
225 |     for word in statistics:
226 |         t.write(word + '\n')
227 | del t,word
228 | 
229 | ####################################################
230 | 
231 | if graph:
232 |     plt.style.use("seaborn-poster")
233 |     fig = plt.figure()
234 |     ax = fig.add_axes([0,0,1,1])
235 |     ax.bar(df["Word"][:limit],df["Normalised score"][:limit], color=cm)
236 |     plt.xticks(rotation=90)
237 |     ax.set_ylabel('Average score (normalised)')
238 |     ax.set_xlabel('Terms')
239 |     ax.set_title("Bar plot of top %s TF-IDF ranked terms" % limit)
240 |     plt.savefig('%s.png' % plot_output_name, bbox_inches='tight')
241 | del ax, fig
242 | 
243 | ####################################################
244 | 
245 | # End of script
246 | 


--------------------------------------------------------------------------------
/catch/catch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | @date: 2024
  5 | @author: Samantha C Pendleton
  6 | @description: with words of interest, grep a text file
  7 | @GitHub: github.com/sap218/jabberwocky
  8 | 
  9 | @useful links:
 10 |     # https://matplotlib.org/stable/users/explain/colors/colormaps.html
 11 | """
 12 | 
 13 | import time
 14 | start_script = time.time()
 15 | 
 16 | from datetime import datetime
 17 | now = datetime.now()
 18 | output_timestamp = now.strftime("%Y%m%d-%H%M")
 19 | del now
 20 | 
 21 | import sys
 22 | 
 23 | import spacy
 24 | from spacy.matcher import PhraseMatcher
 25 | nlp = spacy.load("en_core_web_sm")
 26 | 
 27 | from wordcloud import WordCloud
 28 | import matplotlib.pyplot as plt
 29 | 
 30 | from params_catch import *
 31 | 
 32 | ####################################################
 33 | 
 34 | if is_this_a_test:
 35 |     file_corpus = "../catch/test/social_media_posts.txt"
 36 |     file_words_of_interest = "../bandersnatch/test/snatch_output.txt"
 37 |     
 38 |     dirloc = "test/"
 39 |     output_name_catch = "%sannotation_%s.txt" % (dirloc, output_format)
 40 |     output_name_log = "%slog.txt" % (dirloc)
 41 |     if plotWORDCLOUD: output_name_wordcloud = "%swordcloud.png" % (dirloc)
 42 |     if plotCYANNOTATOR: output_name_cyannotator = "%scyannotator.html" % (dirloc)
 43 | 
 44 | else:
 45 |     dirloc = "output/"
 46 |     output_name_catch = "%sannotation_%s_%s_%s.txt" % (dirloc, output_format, output_name, output_timestamp)
 47 |     output_name_log = "%slog_%s_%s.txt" % (dirloc, output_name, output_timestamp)
 48 |     if plotWORDCLOUD: output_name_wordcloud = "%swordcloud_%s_%s.png" % (dirloc, output_name, output_timestamp)
 49 |     if plotCYANNOTATOR: output_name_cyannotator = "%scyannotator_%s_%s.html" % (dirloc, output_name, output_timestamp)
 50 |     
 51 | del dirloc, output_name
 52 | 
 53 | ####################################################
 54 | 
 55 | from highlevel import *
 56 | 
 57 | ''' stopWords '''
 58 | if filter_level == "none": stopWords = stopWords[0]
 59 | elif filter_level == "light": stopWords = stopWords[1]
 60 | elif filter_level == "heavy": stopWords = stopWords[2]
 61 | 
 62 | stopWords_lemma = []
 63 | stopWordsList = []
 64 | for word in stopWords:
 65 |     stopWords_lemma.append(clean_lower_lemma(word, "stopwords", stopWordsList))
 66 | 
 67 | stopWords_lemma_flat = [word for phrase in stopWords_lemma for word in phrase.split()]
 68 | stopWordsList = list(set(filter(None, stopWords_lemma_flat)))
 69 | 
 70 | del word, stopWords, stopWords_lemma, stopWords_lemma_flat#, doc
 71 | 
 72 | ####################################################
 73 | ####################################################
 74 | 
 75 | try:
 76 |     list_of_posts = []
 77 |     with open("%s" % file_corpus, "r") as t:
 78 |         for line in t:
 79 |             list_of_posts.append(line.strip("\n").strip(" "))
 80 |     del file_corpus, t, line
 81 | except FileNotFoundError:
 82 |     sys.exit("Cannot find [corpus] text file")
 83 | 
 84 | list_of_posts = list(filter(None, list_of_posts)) # remove empty lines
 85 | 
 86 | post_stats = [len(x.split()) for x in list_of_posts] # word count per line
 87 | 
 88 | ####################################################
 89 | ####################################################
 90 | 
 91 | if len(file_words_of_interest) > 0:
 92 |     try:
 93 |         words_of_interest = []
 94 |         with open("%s" % file_words_of_interest, "r") as t:
 95 |             for line in t:
 96 |                 words_of_interest.append(line.strip("\n").strip(" "))
 97 |         del t, line
 98 |     except FileNotFoundError:
 99 |         #sys.exit("User attempted to provide a list of terms for annotation - unsuccessful")
100 |         sys.exit("Cannot find [words of interest] file")
101 | else: words_of_interest = [] #["nowordstofilter"]
102 | 
103 | words_of_interest = list(filter(None, words_of_interest)) # remove empty lines
104 | 
105 | ####################################################
106 | 
107 | statistics = [
108 |     "is this a test: %s" % str(is_this_a_test),
109 |     "stopword filter level: %s" % filter_level,
110 |     "concepts count: %s" % len(words_of_interest),
111 |     "post count: %s" % len(list_of_posts),
112 |     "average word count: %s" % (sum(post_stats)/len(post_stats)),
113 |     ]
114 | del post_stats, filter_level
115 | 
116 | if not words_of_interest: words_of_interest = ["PlaceholderAsThereAreNoWordsToFilter"]
117 | 
118 | ####################################################
119 | ####################################################
120 | 
121 | words_of_interest_formatted = [] 
122 | concept_patterns = [] # for matcher
123 | 
124 | # preprocess concepts: Lemmatize & stopWords
125 | for concept in words_of_interest: 
126 |     doc_lemma_stpwrd_filter = clean_lower_lemma(concept, "text", stopWordsList)
127 | 
128 |     if doc_lemma_stpwrd_filter:
129 |         concept_patterns.append(nlp(" ".join(doc_lemma_stpwrd_filter).lower()))
130 |         words_of_interest_formatted.append(" ".join(doc_lemma_stpwrd_filter).lower())
131 | del concept
132 | 
133 | matcher = PhraseMatcher(nlp.vocab) # initialize phrase matcher
134 | matcher.add("Concepts", None, *concept_patterns) # convert concepts into patterns
135 | del concept_patterns
136 | 
137 | ####################################################
138 | ####################################################
139 | 
140 | doc_lemma_stpwrd_filter_output = []
141 | list_of_posts_formatted = []
142 | 
143 | for post in list_of_posts:
144 |     doc_lemma_stpwrd_filter = clean_lower_lemma(post, "text", stopWordsList)
145 |     
146 |     list_of_posts_formatted.append(" ".join(doc_lemma_stpwrd_filter).lower())
147 |     
148 |     doc_lemma_stpwrd_filter_output.append(doc_lemma_stpwrd_filter)
149 |     
150 | del post, doc_lemma_stpwrd_filter
151 |     
152 | ####################################################
153 | 
154 | if plotWORDCLOUD:
155 |     if not colormapWC: colormapWC = "Set3"
156 |     
157 |     wc = WordCloud(
158 |         width = 2048, height = 1080,
159 |         
160 |         background_color='white',
161 |         colormap = colormapWC,
162 |         contour_color='black', contour_width=10,
163 |         
164 |         max_words=30, min_font_size=10,
165 |         #stopwords = ['word'], # words don't want to plot
166 |         collocations = True, # words joined together
167 |         normalize_plurals=False,
168 |         
169 |         prefer_horizontal=0.8,scale=2,
170 |         random_state=123
171 |         ).generate(" ".join(list_of_posts_formatted))
172 |     
173 |     plt.figure(figsize=(10, 5))
174 |     plt.axis("off")
175 |     plt.tight_layout(pad = 0)
176 |     plt.imshow(wc, interpolation="bilinear")
177 |     plt.savefig('%s' % output_name_wordcloud)
178 | 
179 |     del plotWORDCLOUD, colormapWC, wc, output_name_wordcloud
180 | 
181 | ####################################################
182 |     
183 | # cyan with soft glow for highlighting, can use :cyan for original
184 | if plotCYANNOTATOR:
185 |     if not highlightcolour: highlightcolour = "#00bcd4"
186 |     cyancolour = ["<span style='color: %s; text-shadow: 0 0 10px rgba(0, 188, 212, 0.5);'>" % highlightcolour,
187 |                "</span>"]  # plain
188 |     cyannotator_text = []
189 | 
190 | ####################################################
191 | 
192 | start_annotation = time.time()
193 | 
194 | matched_output_list = []
195 | 
196 | y = 0
197 | for post in doc_lemma_stpwrd_filter_output:  
198 |     print("Sentence iteration ", y+1, " out of ", len(list_of_posts))
199 |     
200 |     post = " ".join(post)
201 |     
202 |     doc = nlp(post)
203 |     matches = matcher(doc)
204 |     
205 |     if matches:
206 |         matched_concepts = set()
207 |         #if cyannotator: highlighting = " ".join(doc_lemma_stpwrd_filter).lower()
208 | 
209 |         cyaned = []
210 |         for match_id, start, end in matches:
211 |             matched_span = doc[start:end]
212 |             matched_concepts.add(matched_span.text)
213 |             
214 |             if plotCYANNOTATOR:
215 |                 highlighting = re.sub(r'\b%s\b' % re.escape(matched_span.text),
216 |                                           (cyancolour[0] + matched_span.text + cyancolour[1]), post)
217 |                 cyaned.append(highlighting)
218 |         if plotCYANNOTATOR: cyannotator_text.append(cyaned[-1])
219 |             
220 |         matched_output_list.append([ list(matched_concepts), list_of_posts[y] ])
221 |         
222 |         del matched_concepts, match_id, start, end, matched_span, cyaned
223 |         
224 |     else: 
225 |         matched_output_list.append([ "NO ANNOTATION", list_of_posts[y] ])
226 |     
227 |     y = y + 1
228 | 
229 | del y, post, doc, matches
230 | 
231 | ####################################################
232 | 
233 | end_annotation = time.time() - start_annotation
234 | end_annotation = str(round(end_annotation, 2))
235 | statistics.append("time taken to annotate (seconds): %s" % end_annotation)
236 | del start_annotation, end_annotation
237 | 
238 | ####################################################
239 | ####################################################
240 | 
241 | matched_output_list_output = []
242 | 
243 | for x,content in enumerate(matched_output_list):
244 |     
245 |     if output_format == "wtags":
246 |         if content[0] != "NO ANNOTATION": matched_output_list_output.append( "%s # %s" % (sorted(content[0]),content[1]) )
247 |     elif output_format == "grep":
248 |         if content[0] != "NO ANNOTATION": matched_output_list_output.append(content[1])
249 |     elif output_format == "invertedgrep":
250 |         if content[0] == "NO ANNOTATION": matched_output_list_output.append(content[1])
251 | 
252 | del x, content, output_format
253 | 
254 | if not matched_output_list_output:
255 |     matched_output_list_output.append("NO ANNOTATIONS") 
256 |     if not file_words_of_interest:
257 |         statistics.append("NO ANNOTATIONS - this is due to an empty [words of interest] file")
258 |     else: statistics.append("NO ANNOTATIONS")
259 | 
260 | ####################################################
261 | 
262 | with open('%s' % output_name_catch, 'w') as t:
263 |     for word in matched_output_list_output:
264 |         t.write(word + '\n')
265 | del t, word, output_name_catch
266 | 
267 | ####################################################
268 | 
269 | if plotCYANNOTATOR:
270 |     if not cyannotator_text:
271 |         cyannotator_text = ["NO ANNOTATIONS"]
272 |     
273 |     html_content = "<html><body>"
274 |     html_content += "<br>".join(cyannotator_text)
275 |     html_content += "</body></html>"
276 |     
277 |     with open('%s' % output_name_cyannotator, 'w') as f:
278 |         f.write(html_content)
279 |     del f, output_name_cyannotator
280 | 
281 | ####################################################
282 | 
283 | end_script = time.time() - start_script
284 | end_script = str(round(end_script, 2))
285 | statistics.append("time taken to run script (seconds): %s" % end_script)
286 | del start_script, end_script
287 | 
288 | with open(output_name_log, 'w') as t:
289 |     for word in statistics:
290 |         t.write(word + '\n')
291 | del t, word, output_name_log
292 | 
293 | ####################################################
294 | 
295 | # End of script
296 | 


--------------------------------------------------------------------------------
/highlevel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | @date: 2024
 5 | @author: Samantha C Pendleton
 6 | @description: high-level variables
 7 | @GitHub: github.com/sap218/jabberwocky
 8 | 
 9 | @useful links:
10 |     # https://gist.github.com/sebleier/554280
11 | """
12 | 
13 | import re
14 | import contractions
15 | 
16 | import spacy
17 | nlp = spacy.load("en_core_web_sm")
18 | 
19 | def cleantext(post):
20 |     post = contractions.fix(post)
21 |     post = re.sub(' +', ' ', post) # double spaces
22 |     post = re.sub("[^A-Za-z0-9']+", " ", post).replace("'", " ").strip() # consider "
23 |     return post
24 | 
25 | ####################################################
26 | 
27 | stopWords = [ 
28 |     [''],
29 |     ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"],
30 |     ["0o", "0s", "3a", "3b", "3d", "6b", "6o", "a", "a1", "a2", "a3", "a4", "ab", "able", "about", "above", "abst", "ac", "accordance", "according", "accordingly", "across", "act", "actually", "ad", "added", "adj", "ae", "af", "affected", "affecting", "affects", "after", "afterwards", "ag", "again", "against", "ah", "ain", "ain't", "aj", "al", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "announce", "another", "any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "ao", "ap", "apart", "apparently", "appear", "appreciate", "appropriate", "approximately", "ar", "are", "aren", "arent", "aren't", "arise", "around", "as", "a's", "aside", "ask", "asking", "associated", "at", "au", "auth", "av", "available", "aw", "away", "awfully", "ax", "ay", "az", "b", "b1", "b2", "b3", "ba", "back", "bc", "bd", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "beginning", "beginnings", "begins", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "bi", "bill", "biol", "bj", "bk", "bl", "bn", "both", "bottom", "bp", "br", "brief", "briefly", "bs", "bt", "bu", "but", "bx", "by", "c", "c1", "c2", "c3", "ca", "call", "came", "can", "cannot", "cant", "can't", "cause", "causes", "cc", "cd", "ce", "certain", "certainly", "cf", "cg", "ch", "changes", "ci", "cit", "cj", "cl", "clearly", "cm", "c'mon", "cn", "co", "com", "come", "comes", "con", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn", "couldnt", "couldn't", "course", "cp", "cq", "cr", "cry", "cs", "c's", "ct", "cu", "currently", "cv", "cx", "cy", "cz", "d", "d2", "da", "date", "dc", "dd", "de", "definitely", "describe", "described", "despite", "detail", "df", "di", "did", "didn", "didn't", "different", "dj", "dk", "dl", "do", "does", "doesn", "doesn't", "doing", "don", "done", "don't", "down", "downwards", "dp", "dr", "ds", "dt", "du", "due", "during", "dx", "dy", "e", "e2", "e3", "ea", "each", "ec", "ed", "edu", "ee", "ef", "effect", "eg", "ei", "eight", "eighty", "either", "ej", "el", "eleven", "else", "elsewhere", "em", "empty", "en", "end", "ending", "enough", "entirely", "eo", "ep", "eq", "er", "es", "especially", "est", "et", "et-al", "etc", "eu", "ev", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "ey", "f", "f2", "fa", "far", "fc", "few", "ff", "fi", "fifteen", "fifth", "fify", "fill", "find", "fire", "first", "five", "fix", "fj", "fl", "fn", "fo", "followed", "following", "follows", "for", "former", "formerly", "forth", "forty", "found", "four", "fr", "from", "front", "fs", "ft", "fu", "full", "further", "furthermore", "fy", "g", "ga", "gave", "ge", "get", "gets", "getting", "gi", "give", "given", "gives", "giving", "gj", "gl", "go", "goes", "going", "gone", "got", "gotten", "gr", "greetings", "gs", "gy", "h", "h2", "h3", "had", "hadn", "hadn't", "happens", "hardly", "has", "hasn", "hasnt", "hasn't", "have", "haven", "haven't", "having", "he", "hed", "he'd", "he'll", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "heres", "here's", "hereupon", "hers", "herself", "hes", "he's", "hh", "hi", "hid", "him", "himself", "his", "hither", "hj", "ho", "home", "hopefully", "how", "howbeit", "however", "how's", "hr", "hs", "http", "hu", "hundred", "hy", "i", "i2", "i3", "i4", "i6", "i7", "i8", "ia", "ib", "ibid", "ic", "id", "i'd", "ie", "if", "ig", "ignored", "ih", "ii", "ij", "il", "i'll", "im", "i'm", "immediate", "immediately", "importance", "important", "in", "inasmuch", "inc", "indeed", "index", "indicate", "indicated", "indicates", "information", "inner", "insofar", "instead", "interest", "into", "invention", "inward", "io", "ip", "iq", "ir", "is", "isn", "isn't", "it", "itd", "it'd", "it'll", "its", "it's", "itself", "iv", "i've", "ix", "iy", "iz", "j", "jj", "jr", "js", "jt", "ju", "just", "k", "ke", "keep", "keeps", "kept", "kg", "kj", "km", "know", "known", "knows", "ko", "l", "l2", "la", "largely", "last", "lately", "later", "latter", "latterly", "lb", "lc", "le", "least", "les", "less", "lest", "let", "lets", "let's", "lf", "like", "liked", "likely", "line", "little", "lj", "ll", "ll", "ln", "lo", "look", "looking", "looks", "los", "lr", "ls", "lt", "ltd", "m", "m2", "ma", "made", "mainly", "make", "makes", "many", "may", "maybe", "me", "mean", "means", "meantime", "meanwhile", "merely", "mg", "might", "mightn", "mightn't", "mill", "million", "mine", "miss", "ml", "mn", "mo", "more", "moreover", "most", "mostly", "move", "mr", "mrs", "ms", "mt", "mu", "much", "mug", "must", "mustn", "mustn't", "my", "myself", "n", "n2", "na", "name", "namely", "nay", "nc", "nd", "ne", "near", "nearly", "necessarily", "necessary", "need", "needn", "needn't", "needs", "neither", "never", "nevertheless", "new", "next", "ng", "ni", "nine", "ninety", "nj", "nl", "nn", "no", "nobody", "non", "none", "nonetheless", "noone", "nor", "normally", "nos", "not", "noted", "nothing", "novel", "now", "nowhere", "nr", "ns", "nt", "ny", "o", "oa", "ob", "obtain", "obtained", "obviously", "oc", "od", "of", "off", "often", "og", "oh", "oi", "oj", "ok", "okay", "ol", "old", "om", "omitted", "on", "once", "one", "ones", "only", "onto", "oo", "op", "oq", "or", "ord", "os", "ot", "other", "others", "otherwise", "ou", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "ow", "owing", "own", "ox", "oz", "p", "p1", "p2", "p3", "page", "pagecount", "pages", "par", "part", "particular", "particularly", "pas", "past", "pc", "pd", "pe", "per", "perhaps", "pf", "ph", "pi", "pj", "pk", "pl", "placed", "please", "plus", "pm", "pn", "po", "poorly", "possible", "possibly", "potentially", "pp", "pq", "pr", "predominantly", "present", "presumably", "previously", "primarily", "probably", "promptly", "proud", "provides", "ps", "pt", "pu", "put", "py", "q", "qj", "qu", "que", "quickly", "quite", "qv", "r", "r2", "ra", "ran", "rather", "rc", "rd", "re", "readily", "really", "reasonably", "recent", "recently", "ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "research-articl", "respectively", "resulted", "resulting", "results", "rf", "rh", "ri", "right", "rj", "rl", "rm", "rn", "ro", "rq", "rr", "rs", "rt", "ru", "run", "rv", "ry", "s", "s2", "sa", "said", "same", "saw", "say", "saying", "says", "sc", "sd", "se", "sec", "second", "secondly", "section", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "sf", "shall", "shan", "shan't", "she", "shed", "she'd", "she'll", "shes", "she's", "should", "shouldn", "shouldn't", "should've", "show", "showed", "shown", "showns", "shows", "si", "side", "significant", "significantly", "similar", "similarly", "since", "sincere", "six", "sixty", "sj", "sl", "slightly", "sm", "sn", "so", "some", "somebody", "somehow", "someone", "somethan", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "sp", "specifically", "specified", "specify", "specifying", "sq", "sr", "ss", "st", "still", "stop", "strongly", "sub", "substantially", "successfully", "such", "sufficiently", "suggest", "sup", "sure", "sy", "system", "sz", "t", "t1", "t2", "t3", "take", "taken", "taking", "tb", "tc", "td", "te", "tell", "ten", "tends", "tf", "th", "than", "thank", "thanks", "thanx", "that", "that'll", "thats", "that's", "that've", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "thered", "therefore", "therein", "there'll", "thereof", "therere", "theres", "there's", "thereto", "thereupon", "there've", "these", "they", "theyd", "they'd", "they'll", "theyre", "they're", "they've", "thickv", "thin", "think", "third", "this", "thorough", "thoroughly", "those", "thou", "though", "thoughh", "thousand", "three", "throug", "through", "throughout", "thru", "thus", "ti", "til", "tip", "tj", "tl", "tm", "tn", "to", "together", "too", "took", "top", "toward", "towards", "tp", "tq", "tr", "tried", "tries", "truly", "try", "trying", "ts", "t's", "tt", "tv", "twelve", "twenty", "twice", "two", "tx", "u", "u201d", "ue", "ui", "uj", "uk", "um", "un", "under", "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "uo", "up", "upon", "ups", "ur", "us", "use", "used", "useful", "usefully", "usefulness", "uses", "using", "usually", "ut", "v", "va", "value", "various", "vd", "ve", "ve", "very", "via", "viz", "vj", "vo", "vol", "vols", "volumtype", "vq", "vs", "vt", "vu", "w", "wa", "want", "wants", "was", "wasn", "wasnt", "wasn't", "way", "we", "wed", "we'd", "welcome", "well", "we'll", "well-b", "went", "were", "we're", "weren", "werent", "weren't", "we've", "what", "whatever", "what'll", "whats", "what's", "when", "whence", "whenever", "when's", "where", "whereafter", "whereas", "whereby", "wherein", "wheres", "where's", "whereupon", "wherever", "whether", "which", "while", "whim", "whither", "who", "whod", "whoever", "whole", "who'll", "whom", "whomever", "whos", "who's", "whose", "why", "why's", "wi", "widely", "will", "willing", "wish", "with", "within", "without", "wo", "won", "wonder", "wont", "won't", "words", "world", "would", "wouldn", "wouldnt", "wouldn't", "www", "x", "x1", "x2", "x3", "xf", "xi", "xj", "xk", "xl", "xn", "xo", "xs", "xt", "xv", "xx", "y", "y2", "yes", "yet", "yj", "yl", "you", "youd", "you'd", "you'll", "your", "youre", "you're", "yours", "yourself", "yourselves", "you've", "yr", "ys", "yt", "z", "zero", "zi", "zz"]
31 |     ]
32 | 
33 | def remove_stop_words(text, stopWordsList):
34 |     #return ' '.join(word for word in text.split() if word.lower() not in stopWordsList)
35 |     return ' '.join(word for word in text.split() if word not in stopWordsList)
36 | 
37 | ####################################################
38 | 
39 | def clean_lower_lemma(iteration, itype, stopWordsList):
40 |     iteration = cleantext(iteration.lower())
41 |     doc = nlp(iteration)
42 |     
43 |     if itype == "stopwords":
44 |         doc_lemma = " ".join([token.lemma_.lower() for token in doc])
45 |     elif itype == "text":
46 |         doc_lemma = [token.lemma_.lower() for token in doc]
47 |         doc_lemma_stopwords = [remove_stop_words(text, stopWordsList) for text in doc_lemma]
48 |         doc_lemma_stopwords_filter = list(filter(None, doc_lemma_stopwords))
49 |         doc_lemma = doc_lemma_stopwords_filter.copy()
50 |         
51 |     return doc_lemma
52 | 
53 | ####################################################
54 | 
55 | # End of script
56 | 


--------------------------------------------------------------------------------
/arise/test/pocketmonsters_updated.owl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <rdf:RDF xml:base="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters" xmlns="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#" xmlns:oboInOWL="http://www.geneontology.org/formats/oboInOWL#" xmlns:oboInOwl="http://www.geneontology.org/formats/oboInOwl#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#">
  3 | <owl:Ontology rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters">
  4 | <rdfs:comment>Brief Pokemon Ontology</rdfs:comment>
  5 | </owl:Ontology>
  6 | <!-- 
  7 |     ///////////////////////////////////////////////////////////////////////////////////////
  8 |     //
  9 |     // Annotation properties
 10 |     //
 11 |     ///////////////////////////////////////////////////////////////////////////////////////
 12 |      -->
 13 | <!-- http://www.geneontology.org/formats/oboInOWL#hasExactSynonym -->
 14 | <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#hasExactSynonym"/>
 15 | <!-- http://www.geneontology.org/formats/oboInOWL#hasRelatedSynonym -->
 16 | <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#hasRelatedSynonym"/>
 17 | <!-- http://www.geneontology.org/formats/oboInOWL#DbXref -->
 18 | <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#DbXref"/>
 19 | <!-- 
 20 |     ///////////////////////////////////////////////////////////////////////////////////////
 21 |     //
 22 |     // Classes
 23 |     //
 24 |     ///////////////////////////////////////////////////////////////////////////////////////
 25 |      -->
 26 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001 -->
 27 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001">
 28 | <oboInOWL:hasRelatedSynonym>evolve</oboInOWL:hasRelatedSynonym><rdfs:label xml:lang="en">generation</rdfs:label>
 29 | </owl:Class>
 30 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002 -->
 31 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002">
 32 | <oboInOWL:DbXref>https://pokemon.fandom.com/wiki/Types</oboInOWL:DbXref><oboInOWL:hasRelatedSynonym>breed</oboInOWL:hasRelatedSynonym><rdfs:label xml:lang="en">type</rdfs:label>
 33 | </owl:Class>
 34 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003 -->
 35 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003">
 36 | <rdfs:label xml:lang="en">size</rdfs:label>
 37 | </owl:Class>
 38 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004 -->
 39 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004">
 40 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
 41 | <rdfs:label xml:lang="en">small</rdfs:label>
 42 | </owl:Class>
 43 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005 -->
 44 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005">
 45 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
 46 | <rdfs:label xml:lang="en">medium</rdfs:label>
 47 | </owl:Class>
 48 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006 -->
 49 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006">
 50 | <oboInOWL:hasRelatedSynonym>mega</oboInOWL:hasRelatedSynonym><rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
 51 | <rdfs:label xml:lang="en">large</rdfs:label>
 52 | </owl:Class>
 53 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00007 -->
 54 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00007">
 55 | <oboInOWL:hasExactSynonym>path</oboInOWL:hasExactSynonym><rdfs:label xml:lang="en">route</rdfs:label>
 56 | </owl:Class>
 57 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008 -->
 58 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008">
 59 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
 60 | <oboInOWL:hasExactSynonym>generation 1</oboInOWL:hasExactSynonym>
 61 | <oboInOWL:hasRelatedSynonym>gen 1</oboInOWL:hasRelatedSynonym>
 62 | <oboInOWL:hasRelatedSynonym>gen one</oboInOWL:hasRelatedSynonym>
 63 | <rdfs:label xml:lang="en">generation one</rdfs:label>
 64 | </owl:Class>
 65 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009 -->
 66 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009">
 67 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
 68 | <oboInOWL:hasExactSynonym>generation 2</oboInOWL:hasExactSynonym>
 69 | <oboInOWL:hasRelatedSynonym>gen 2</oboInOWL:hasRelatedSynonym>
 70 | <oboInOWL:hasRelatedSynonym>gen two</oboInOWL:hasRelatedSynonym>
 71 | <rdfs:label xml:lang="en">generation two</rdfs:label>
 72 | </owl:Class>
 73 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010 -->
 74 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010">
 75 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
 76 | <oboInOWL:hasExactSynonym>generation 3</oboInOWL:hasExactSynonym>
 77 | <oboInOWL:hasRelatedSynonym>gen 3</oboInOWL:hasRelatedSynonym>
 78 | <oboInOWL:hasRelatedSynonym>gen three</oboInOWL:hasRelatedSynonym>
 79 | <rdfs:label xml:lang="en">generation three</rdfs:label>
 80 | </owl:Class>
 81 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011 -->
 82 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011">
 83 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
 84 | <oboInOWL:hasExactSynonym>generation 4</oboInOWL:hasExactSynonym>
 85 | <oboInOWL:hasRelatedSynonym>gen 4</oboInOWL:hasRelatedSynonym>
 86 | <oboInOWL:hasRelatedSynonym>gen four</oboInOWL:hasRelatedSynonym>
 87 | <rdfs:label xml:lang="en">generation four</rdfs:label>
 88 | </owl:Class>
 89 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012 -->
 90 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012">
 91 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
 92 | <oboInOWL:hasExactSynonym>generation 5</oboInOWL:hasExactSynonym>
 93 | <oboInOWL:hasRelatedSynonym>gen 5</oboInOWL:hasRelatedSynonym>
 94 | <oboInOWL:hasRelatedSynonym>gen five</oboInOWL:hasRelatedSynonym>
 95 | <rdfs:label xml:lang="en">generation five</rdfs:label>
 96 | </owl:Class>
 97 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013 -->
 98 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013">
 99 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
100 | <oboInOWL:hasExactSynonym>generation 6</oboInOWL:hasExactSynonym>
101 | <oboInOWL:hasRelatedSynonym>gen 6</oboInOWL:hasRelatedSynonym>
102 | <oboInOWL:hasRelatedSynonym>gen six</oboInOWL:hasRelatedSynonym>
103 | <rdfs:label xml:lang="en">generation six</rdfs:label>
104 | </owl:Class>
105 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00014 -->
106 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00014">
107 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
108 | <rdfs:label xml:lang="en">normal</rdfs:label>
109 | </owl:Class>
110 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00015 -->
111 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00015">
112 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
113 | <rdfs:label xml:lang="en">grass</rdfs:label>
114 | </owl:Class>
115 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00016 -->
116 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00016">
117 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
118 | <rdfs:label xml:lang="en">water</rdfs:label>
119 | </owl:Class>
120 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00017 -->
121 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00017">
122 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
123 | <rdfs:label xml:lang="en">fire</rdfs:label>
124 | </owl:Class>
125 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00018 -->
126 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00018">
127 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
128 | <rdfs:label xml:lang="en">electric</rdfs:label>
129 | </owl:Class>
130 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00019 -->
131 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00019">
132 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
133 | <rdfs:label xml:lang="en">ground</rdfs:label>
134 | </owl:Class>
135 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00020 -->
136 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00020">
137 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
138 | <rdfs:label xml:lang="en">rock</rdfs:label>
139 | </owl:Class>
140 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00021 -->
141 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00021">
142 | <oboInOWL:hasRelatedSynonym>air</oboInOWL:hasRelatedSynonym><oboInOWL:hasExactSynonym>flew</oboInOWL:hasExactSynonym><rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
143 | <rdfs:label xml:lang="en">flying</rdfs:label>
144 | </owl:Class>
145 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00022 -->
146 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00022">
147 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
148 | <rdfs:label xml:lang="en">bug</rdfs:label>
149 | </owl:Class>
150 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00023 -->
151 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00023">
152 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
153 | <rdfs:label xml:lang="en">poison</rdfs:label>
154 | </owl:Class>
155 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00024 -->
156 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00024">
157 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
158 | <rdfs:label xml:lang="en">fighting</rdfs:label>
159 | </owl:Class>
160 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00025 -->
161 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00025">
162 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
163 | <rdfs:label xml:lang="en">psychic</rdfs:label>
164 | </owl:Class>
165 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00026 -->
166 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00026">
167 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
168 | <rdfs:label xml:lang="en">ghost</rdfs:label>
169 | </owl:Class>
170 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00027 -->
171 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00027">
172 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
173 | <rdfs:label xml:lang="en">dark</rdfs:label>
174 | </owl:Class>
175 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00028 -->
176 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00028">
177 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
178 | <rdfs:label xml:lang="en">ice</rdfs:label>
179 | </owl:Class>
180 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00029 -->
181 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00029">
182 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
183 | <rdfs:label xml:lang="en">steel</rdfs:label>
184 | </owl:Class>
185 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00030 -->
186 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00030">
187 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
188 | <rdfs:label xml:lang="en">dragon</rdfs:label>
189 | </owl:Class>
190 | <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00031 -->
191 | <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00031">
192 | <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
193 | <rdfs:label xml:lang="en">fairy</rdfs:label>
194 | </owl:Class>
195 | <!-- 
196 |     ///////////////////////////////////////////////////////////////////////////////////////
197 |     //
198 |     // General axioms
199 |     //
200 |     ///////////////////////////////////////////////////////////////////////////////////////
201 |      -->
202 | <rdf:Description>
203 | <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#AllDisjointClasses"/>
204 | <owl:members rdf:parseType="Collection">
205 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004"/>
206 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005"/>
207 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006"/>
208 | </owl:members>
209 | </rdf:Description>
210 | <rdf:Description>
211 | <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#AllDisjointClasses"/>
212 | <owl:members rdf:parseType="Collection">
213 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008"/>
214 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009"/>
215 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010"/>
216 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011"/>
217 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012"/>
218 | <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013"/>
219 | </owl:members>
220 | </rdf:Description>
221 | </rdf:RDF><!-- Generated by the OWL API (version 4.5.25.2023-02-15T19:15:49Z) https://github.com/owlcs/owlapi -->


--------------------------------------------------------------------------------
/bandersnatch/test/pocketmonsters.owl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <rdf:RDF xmlns="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#"
  3 |      xml:base="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters"
  4 |      xmlns:owl="http://www.w3.org/2002/07/owl#"
  5 |      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  6 |      xmlns:xml="http://www.w3.org/XML/1998/namespace"
  7 |      xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
  8 |      xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  9 |      xmlns:oboInOWL="http://www.geneontology.org/formats/oboInOWL#"
 10 |      xmlns:oboInOwl="http://www.geneontology.org/formats/oboInOwl#">
 11 |     <owl:Ontology rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters">
 12 |         <rdfs:comment>Brief Pokemon Ontology</rdfs:comment>
 13 |     </owl:Ontology>
 14 |     
 15 | 
 16 | 
 17 |     <!-- 
 18 |     ///////////////////////////////////////////////////////////////////////////////////////
 19 |     //
 20 |     // Annotation properties
 21 |     //
 22 |     ///////////////////////////////////////////////////////////////////////////////////////
 23 |      -->
 24 | 
 25 |     
 26 | 
 27 | 
 28 |     <!-- http://www.geneontology.org/formats/oboInOWL#hasExactSynonym -->
 29 | 
 30 |     <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#hasExactSynonym"/>
 31 |     
 32 | 
 33 | 
 34 |     <!-- http://www.geneontology.org/formats/oboInOWL#hasRelatedSynonym -->
 35 | 
 36 |     <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#hasRelatedSynonym"/>
 37 |     
 38 | 
 39 | 
 40 |     <!-- http://www.geneontology.org/formats/oboInOWL#DbXref -->
 41 | 
 42 |     <owl:AnnotationProperty rdf:about="http://www.geneontology.org/formats/oboInOWL#DbXref"/>
 43 |     
 44 | 
 45 | 
 46 |     <!-- 
 47 |     ///////////////////////////////////////////////////////////////////////////////////////
 48 |     //
 49 |     // Classes
 50 |     //
 51 |     ///////////////////////////////////////////////////////////////////////////////////////
 52 |      -->
 53 | 
 54 |     
 55 | 
 56 | 
 57 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001 -->
 58 | 
 59 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001">
 60 |         <rdfs:label xml:lang="en">generation</rdfs:label>
 61 |     </owl:Class>
 62 |     
 63 | 
 64 | 
 65 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002 -->
 66 | 
 67 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002">
 68 |         <rdfs:label xml:lang="en">type</rdfs:label>
 69 |     </owl:Class>
 70 |     
 71 | 
 72 | 
 73 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003 -->
 74 | 
 75 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003">
 76 |         <rdfs:label xml:lang="en">size</rdfs:label>
 77 |     </owl:Class>
 78 |     
 79 | 
 80 | 
 81 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004 -->
 82 | 
 83 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004">
 84 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
 85 |         <rdfs:label xml:lang="en">small</rdfs:label>
 86 |     </owl:Class>
 87 |     
 88 | 
 89 | 
 90 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005 -->
 91 | 
 92 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005">
 93 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
 94 |         <rdfs:label xml:lang="en">medium</rdfs:label>
 95 |     </owl:Class>
 96 |     
 97 | 
 98 | 
 99 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006 -->
100 | 
101 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006">
102 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00003"/>
103 |         <rdfs:label xml:lang="en">large</rdfs:label>
104 |     </owl:Class>
105 |     
106 | 
107 | 
108 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00007 -->
109 | 
110 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00007">
111 |         <rdfs:label xml:lang="en">route</rdfs:label>
112 |     </owl:Class>
113 |     
114 | 
115 | 
116 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008 -->
117 | 
118 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008">
119 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
120 |         <oboInOWL:hasExactSynonym>generation 1</oboInOWL:hasExactSynonym>
121 |         <oboInOWL:hasRelatedSynonym>gen 1</oboInOWL:hasRelatedSynonym>
122 |         <oboInOWL:hasRelatedSynonym>gen one</oboInOWL:hasRelatedSynonym>
123 |         <rdfs:label xml:lang="en">generation one</rdfs:label>
124 |     </owl:Class>
125 |     
126 | 
127 | 
128 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009 -->
129 | 
130 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009">
131 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
132 |         <oboInOWL:hasExactSynonym>generation 2</oboInOWL:hasExactSynonym>
133 |         <oboInOWL:hasRelatedSynonym>gen 2</oboInOWL:hasRelatedSynonym>
134 |         <oboInOWL:hasRelatedSynonym>gen two</oboInOWL:hasRelatedSynonym>
135 |         <rdfs:label xml:lang="en">generation two</rdfs:label>
136 |     </owl:Class>
137 |     
138 | 
139 | 
140 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010 -->
141 | 
142 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010">
143 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
144 |         <oboInOWL:hasExactSynonym>generation 3</oboInOWL:hasExactSynonym>
145 |         <oboInOWL:hasRelatedSynonym>gen 3</oboInOWL:hasRelatedSynonym>
146 |         <oboInOWL:hasRelatedSynonym>gen three</oboInOWL:hasRelatedSynonym>
147 |         <rdfs:label xml:lang="en">generation three</rdfs:label>
148 |     </owl:Class>
149 |     
150 | 
151 | 
152 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011 -->
153 | 
154 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011">
155 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
156 |         <oboInOWL:hasExactSynonym>generation 4</oboInOWL:hasExactSynonym>
157 |         <oboInOWL:hasRelatedSynonym>gen 4</oboInOWL:hasRelatedSynonym>
158 |         <oboInOWL:hasRelatedSynonym>gen four</oboInOWL:hasRelatedSynonym>
159 |         <rdfs:label xml:lang="en">generation four</rdfs:label>
160 |     </owl:Class>
161 |     
162 | 
163 | 
164 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012 -->
165 | 
166 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012">
167 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
168 |         <oboInOWL:hasExactSynonym>generation 5</oboInOWL:hasExactSynonym>
169 |         <oboInOWL:hasRelatedSynonym>gen 5</oboInOWL:hasRelatedSynonym>
170 |         <oboInOWL:hasRelatedSynonym>gen five</oboInOWL:hasRelatedSynonym>
171 |         <rdfs:label xml:lang="en">generation five</rdfs:label>
172 |     </owl:Class>
173 |     
174 | 
175 | 
176 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013 -->
177 | 
178 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013">
179 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00001"/>
180 |         <oboInOWL:hasExactSynonym>generation 6</oboInOWL:hasExactSynonym>
181 |         <oboInOWL:hasRelatedSynonym>gen 6</oboInOWL:hasRelatedSynonym>
182 |         <oboInOWL:hasRelatedSynonym>gen six</oboInOWL:hasRelatedSynonym>
183 |         <rdfs:label xml:lang="en">generation six</rdfs:label>
184 |     </owl:Class>
185 |     
186 | 
187 | 
188 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00014 -->
189 | 
190 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00014">
191 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
192 |         <rdfs:label xml:lang="en">normal</rdfs:label>
193 |     </owl:Class>
194 |     
195 | 
196 | 
197 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00015 -->
198 | 
199 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00015">
200 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
201 |         <rdfs:label xml:lang="en">grass</rdfs:label>
202 |     </owl:Class>
203 |     
204 | 
205 | 
206 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00016 -->
207 | 
208 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00016">
209 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
210 |         <rdfs:label xml:lang="en">water</rdfs:label>
211 |     </owl:Class>
212 |     
213 | 
214 | 
215 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00017 -->
216 | 
217 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00017">
218 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
219 |         <rdfs:label xml:lang="en">fire</rdfs:label>
220 |     </owl:Class>
221 |     
222 | 
223 | 
224 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00018 -->
225 | 
226 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00018">
227 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
228 |         <rdfs:label xml:lang="en">electric</rdfs:label>
229 |     </owl:Class>
230 |     
231 | 
232 | 
233 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00019 -->
234 | 
235 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00019">
236 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
237 |         <rdfs:label xml:lang="en">ground</rdfs:label>
238 |     </owl:Class>
239 |     
240 | 
241 | 
242 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00020 -->
243 | 
244 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00020">
245 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
246 |         <rdfs:label xml:lang="en">rock</rdfs:label>
247 |     </owl:Class>
248 |     
249 | 
250 | 
251 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00021 -->
252 | 
253 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00021">
254 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
255 |         <rdfs:label xml:lang="en">flying</rdfs:label>
256 |     </owl:Class>
257 |     
258 | 
259 | 
260 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00022 -->
261 | 
262 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00022">
263 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
264 |         <rdfs:label xml:lang="en">bug</rdfs:label>
265 |     </owl:Class>
266 |     
267 | 
268 | 
269 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00023 -->
270 | 
271 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00023">
272 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
273 |         <rdfs:label xml:lang="en">poison</rdfs:label>
274 |     </owl:Class>
275 |     
276 | 
277 | 
278 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00024 -->
279 | 
280 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00024">
281 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
282 |         <rdfs:label xml:lang="en">fighting</rdfs:label>
283 |     </owl:Class>
284 |     
285 | 
286 | 
287 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00025 -->
288 | 
289 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00025">
290 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
291 |         <rdfs:label xml:lang="en">psychic</rdfs:label>
292 |     </owl:Class>
293 |     
294 | 
295 | 
296 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00026 -->
297 | 
298 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00026">
299 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
300 |         <rdfs:label xml:lang="en">ghost</rdfs:label>
301 |     </owl:Class>
302 |     
303 | 
304 | 
305 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00027 -->
306 | 
307 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00027">
308 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
309 |         <rdfs:label xml:lang="en">dark</rdfs:label>
310 |     </owl:Class>
311 |     
312 | 
313 | 
314 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00028 -->
315 | 
316 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00028">
317 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
318 |         <rdfs:label xml:lang="en">ice</rdfs:label>
319 |     </owl:Class>
320 |     
321 | 
322 | 
323 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00029 -->
324 | 
325 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00029">
326 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
327 |         <rdfs:label xml:lang="en">steel</rdfs:label>
328 |     </owl:Class>
329 |     
330 | 
331 | 
332 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00030 -->
333 | 
334 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00030">
335 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
336 |         <rdfs:label xml:lang="en">dragon</rdfs:label>
337 |     </owl:Class>
338 |     
339 | 
340 | 
341 |     <!-- https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00031 -->
342 | 
343 |     <owl:Class rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00031">
344 |         <rdfs:subClassOf rdf:resource="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00002"/>
345 |         <rdfs:label xml:lang="en">fairy</rdfs:label>
346 |     </owl:Class>
347 |     
348 | 
349 | 
350 |     <!-- 
351 |     ///////////////////////////////////////////////////////////////////////////////////////
352 |     //
353 |     // General axioms
354 |     //
355 |     ///////////////////////////////////////////////////////////////////////////////////////
356 |      -->
357 | 
358 |     <rdf:Description>
359 |         <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#AllDisjointClasses"/>
360 |         <owl:members rdf:parseType="Collection">
361 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00004"/>
362 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00005"/>
363 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00006"/>
364 |         </owl:members>
365 |     </rdf:Description>
366 |     <rdf:Description>
367 |         <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#AllDisjointClasses"/>
368 |         <owl:members rdf:parseType="Collection">
369 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00008"/>
370 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00009"/>
371 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00010"/>
372 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00011"/>
373 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00012"/>
374 |             <rdf:Description rdf:about="https://github.com/sap218/jabberwocky-tests/tree/master/ontology/pocketmonsters#PM_00013"/>
375 |         </owl:members>
376 |     </rdf:Description>
377 | </rdf:RDF>
378 | 
379 | 
380 | 
381 | <!-- Generated by the OWL API (version 4.5.25.2023-02-15T19:15:49Z) https://github.com/owlcs/owlapi -->
382 | 
383 | 


--------------------------------------------------------------------------------