├── .gitignore
├── README.md
├── jseg
├── Emodet
│ ├── __init__.py
│ ├── emo2.json
│ ├── emo3.json
│ ├── emo4.json
│ ├── emoall.txt
│ ├── emodata.json
│ ├── emodet.py
│ └── emodet2.py
├── __init__.py
├── brill_tagger.pkl
├── dict.txt
├── emoall.txt
├── emodet.py
├── jieba.py
├── prob
│ ├── emisP.json
│ ├── initP.json
│ └── tranP.json
├── ptt_encyc.txt
└── test.py
├── pip.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/vim,osx,linux,python
3 |
4 | ### Linux ###
5 | *~
6 |
7 | # temporary files which can be created if a process still has a handle open of a deleted file
8 | .fuse_hidden*
9 |
10 | # KDE directory preferences
11 | .directory
12 |
13 | # Linux trash folder which might appear on any partition or disk
14 | .Trash-*
15 |
16 | # .nfs files are created when an open file is removed but is still being accessed
17 | .nfs*
18 |
19 | ### OSX ###
20 | *.DS_Store
21 | .AppleDouble
22 | .LSOverride
23 |
24 | # Icon must end with two \r
25 | Icon
26 |
27 | # Thumbnails
28 | ._*
29 |
30 | # Files that might appear in the root of a volume
31 | .DocumentRevisions-V100
32 | .fseventsd
33 | .Spotlight-V100
34 | .TemporaryItems
35 | .Trashes
36 | .VolumeIcon.icns
37 | .com.apple.timemachine.donotpresent
38 |
39 | # Directories potentially created on remote AFP share
40 | .AppleDB
41 | .AppleDesktop
42 | Network Trash Folder
43 | Temporary Items
44 | .apdisk
45 |
46 | ### Python ###
47 | # Byte-compiled / optimized / DLL files
48 | __pycache__/
49 | *.py[cod]
50 | *$py.class
51 |
52 | # C extensions
53 | *.so
54 |
55 | # Distribution / packaging
56 | .Python
57 | build/
58 | develop-eggs/
59 | dist/
60 | downloads/
61 | eggs/
62 | .eggs/
63 | lib/
64 | lib64/
65 | parts/
66 | sdist/
67 | var/
68 | wheels/
69 | *.egg-info/
70 | .installed.cfg
71 | *.egg
72 |
73 | # PyInstaller
74 | # Usually these files are written by a python script from a template
75 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
76 | *.manifest
77 | *.spec
78 |
79 | # Installer logs
80 | pip-log.txt
81 | pip-delete-this-directory.txt
82 |
83 | # Unit test / coverage reports
84 | htmlcov/
85 | .tox/
86 | .coverage
87 | .coverage.*
88 | .cache
89 | nosetests.xml
90 | coverage.xml
91 | *.cover
92 | .hypothesis/
93 |
94 | # Translations
95 | *.mo
96 | *.pot
97 |
98 | # Django stuff:
99 | *.log
100 | local_settings.py
101 |
102 | # Flask stuff:
103 | instance/
104 | .webassets-cache
105 |
106 | # Scrapy stuff:
107 | .scrapy
108 |
109 | # Sphinx documentation
110 | docs/_build/
111 |
112 | # PyBuilder
113 | target/
114 |
115 | # Jupyter Notebook
116 | .ipynb_checkpoints
117 |
118 | # pyenv
119 | .python-version
120 |
121 | # celery beat schedule file
122 | celerybeat-schedule
123 |
124 | # SageMath parsed files
125 | *.sage.py
126 |
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 |
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 |
140 | # Rope project settings
141 | .ropeproject
142 |
143 | # mkdocs documentation
144 | /site
145 |
146 | # mypy
147 | .mypy_cache/
148 |
149 | ### Vim ###
150 | # swap
151 | [._]*.s[a-v][a-z]
152 | [._]*.sw[a-p]
153 | [._]s[a-v][a-z]
154 | [._]sw[a-p]
155 | # session
156 | Session.vim
157 | # temporary
158 | .netrwhist
159 | # auto-generated tag files
160 | tags
161 |
162 | # End of https://www.gitignore.io/api/vim,osx,linux,python
163 |
164 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Jseg
2 | ## A modified version of Jieba
3 |
4 | All credit goes to [fxsjy/jieba](https://github.com/fxsjy/jieba").
5 | Find more on: https://github.com/fxsjy/jieba
6 |
7 |
8 | ### Synopsis
9 |
10 | 1. **Equipped with Emoticon detection**
11 | Emoticons will not be segmented as sequences of meaningless punctuations.
12 |
13 | 2. **Data are trained with Sinica Corpus**
14 | Results are more accurate when dealing with Traditional Chinese (F1-score = 0.91).
15 |
16 | 3. **Using Brill Tagger**
17 | Training data are trained with Sinica Treebank, which raises the accuracy of POS tagging.
18 |
19 | ### Environment
20 | + Python2.7+
21 | + Python3.3+
22 |
23 |
24 | ### Installation
25 |
26 | pip install -U jseg
27 |
28 |
29 | ### Usage
30 |
31 | from jseg import Jieba
32 | j = Jieba()
33 |
34 | ### Add user defined dictionary
35 |
36 | j.add_guaranteed_wordlist(lst)
37 |
38 |
39 | Here's a sample text:
40 |
41 |
42 | sample = '期末要爆炸啦! ◢▆▅▄▃崩╰(〒皿〒)╯潰▃▄▅▇◣'
43 |
44 | Segmentation *with* POS (part-of-speech)
45 |
46 | j.seg(sample, pos=True)
47 |
--------------------------------------------------------------------------------
/jseg/Emodet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amigcamel/Jseg/1b8f6aa5e65cfa770cb7e81df0a4fbc15a7bcd35/jseg/Emodet/__init__.py
--------------------------------------------------------------------------------
/jseg/Emodet/emo2.json:
--------------------------------------------------------------------------------
1 | ["(\u0c25\u0e3a\u02c7\u0c6a\u02c7\u0c25)", "( \u00b4\u0c25,_\u2025\u0c25\uff40)b", "( \u00b4\u0c25\u0c6a\u0c25\uff09\u03c3", "(\u11c2\u0434\u11c2 )", "(\u229c\u203f\u229c\u2734\uff09", "( \u0384\u25de\u0e34 .\u032b.\u032b \u25df\u0e34\u2035)y", "(;\u0326\u0326\u0326\u0326\u0326\u0326\u0326\u0326\u2182\u20dd\u20d3\u20d9\u20da\u20d8\u0434\u2182\u20dd\u20d3\u20d9\u20da\u20d8;\u0326\u0326\u0326\u0326\u0326\u200b\u0326\u0326\u0326)", "\u2037\u0317\u2182\u51f8\u2182\u2034\u0316", "( \u0c20\u0d60\u0c20 )\uff89", "\u10f0\u10f0\u10f0\u275b\u203f\u275b\u10f4\u10f4\u10f4", "(\u0e51\u0c25\u0e34\u76ca\u0c25\u0e34)", "(\uff0c_\u309d\u06de)", "\u3020_\u3020", "\u2022\u271e_\u271e\u2022", "\u2299\u2583\u2299", "\u0ca4\u0c8e\u0ca4", "\u0c20_\u0c20", "\u0cb8_\u0cb8", "\u2181_\u2181", "\u21ce_\u21ce", "\ud76b_\ud76b", "\u0ca0_\u0ca0", "(\uff65\u0e34\u03c9\uff65\u0e34)", "(\u0ca5_\u0ca5)", "\u25c9\u0e37\u76ca\u25c9\u0e37", "\u0ca0\u76ca\u0ca0", "(\u00b4=\u0e34\u76ca=\u0e34\uff40)", "(\u00b4^\u0e34\u76ca^\u0e34\uff40 )", "(\u25de\u227c\u25c9\u0e37\u227d\u25df;\u76ca;\u25de\u227c\u25c9\u0e37 \u227d\u25df)", "\u261c(:\u265b\u0e3a;\u76ca;\u265b\u0e3a;)\u261e", "(\u256c\u24ea\u76ca\u24ea)", "(\u256c\uff9f\u25e5\u76ca\u25e4\uff9f)", "( \u256c\u25e3 \u76ca\u25e2)y", "(\u256c\u0ca0\u0e34\u76ca\u0ca0\u0e34)", "(\u0e51\u2256\u0e34\u0e34\u76ca\u2256\u0e34\u0e51)", "(\u256c\u265b. \u76ca\u265b. )", "(\u256c\u0ca0\u76ca\u0ca0)\uff7a\uff9e\uff99\u30a1!!", "\u723b((\u256c\u0ca0\u0e34\u76ca\u0ca0\u0e34))\uff72\uff76\uff98\uff7d\uff9e\uff91\uff6f!!", "\u7676(\u7676;:\u309c;\u76ca;\u309c;)\u7676", "(\u0e51\uff65\u0e34\u22d6\u22d7\uff65\u0e34\u0e51)", "(\u0301\u25c9\u25de\u0c6a\u25df\u25c9\u2035)", "( ^\u0e34\u0c6a^\u0e34)", "(\u25de\u227c\u25cf\u227d\u25df\u25de\u0c6a\u25df\u25de\u227c\u25cf\u227d\u25df)", "\uff08 \u00b4\u2623///_\u309d///\u2623\uff40\uff09", "(\u0301\u25d5///\u25de\u0c6a\u25df///\u25d5\u2035)/", "\u30fd(\u0301\u25d5\u25de\u0c6a\u25df\u25d5\u2035)\uff89", "(\u0301=\u25de\u0c6a\u25df=\u2035)", "(*\u2609\u0c6a\u2299*)", "(\u0301^\u25de\u0c6a\u25df<\u2035)", "\uff08\u00b4\u225d\u25de\u2296\u25df\u225d\uff40)", "\u4e41( \u25d4 \u0c6a\u25d4)\u300c", "(\u0301\u2256\u25de\u0c6a\u25df\u2256\u2035)/", "(\u0301>\u25de\u0c6a\u25df<\u2035)\uff89\uff7c", "(\u0301\u261c\u25de\u0c6a\u25df\u261e`)/", "(\u0301\u3002\u25de\u0c6a\u25df\u3002\u2035)", "(\u0301\u3064\u25de\u0c6a\u25df\u2282\u2035)", "(\u0301\u4e00\u25de\u0c6a\u25df\u4e00\u2035)", "(V)(\u0301\u25c9\u25de\u0c6a\u25df\u25c9\u2035)(V)", "(\u0301\u0b87\u25de\u0c6a\u25df\u0b87\u2035)", "(\u0384\u0ca2\u25de\u0c6a\u25df\u0ca2\u2035)\u25c9\u25de\u0c6a\u25df\u25c9)", "(\u0384\u25c9\u25de\u0c6a\u25df\u25c9\u2035)", "(\u0301\u25c9\u25de\u0c6a\u25c9*\u25c9\u25c9\u25de\u25d5\u0c6a\u0c6a\u25df\u25d5\u25c9\u25c9\u03c6\u25c9\u0c6a\u25de\u25c9*\u2035)", "(\u0301\u3020\u25de\u0c6a\u25df\u3020\u2035)", "(\u0301\u25a3\u25de\u0c6a\u25df\u25a3\u2035)", "(\u0301\u273a\u25de\u0c6a\u25df\u273a\u2035)", "(\u0301\u0c25\u25de..\u25df\u0c25\u2035)", "(\u0301\u2299\u25de\u0c6a\u25df\u2299\u2035)", "(\u0301\u2738\u25de\u2649\u25df\u2738\u2035)", "(\u0301\u06de\u25de\u0c6a\u25df\u06de\u2035)", "(\u0301\u0ca4\u25de\u0c6a\u25df\u0ca4\u2035)", "(\u0301\u0b87\u25de\u0c6a\u25df\u0b87\u2035)", "(\u0301\u58a8\u25de\u0c6a\u25df\u93e1\u2035)", "\u0295\u2022\u032b\u0361\u2022\u0294", "\u10da(\u0301\u25c9\u25de\u0c6a\u25df\u25c9\u2035\u10da)", "\uff08\u10da ^\u0e34\u0c6a^\u0e34\uff09\u10da", "\u10da(\u0301\u0ca2.\u25de\u0c6a\u25df\u0ca2\u2035\u10da)", "\u10da\u2182\u203f\u203f\u2182\u10da", "\uff08\u10da ^\u0e34\u0c6a^\u0e34\uff09\u10da", "\u10da(\u2579\u25e1\u2579\u10da)", "\u10da(\u2579\u03b5\u2579\u10da)", "\uff08\u10da ^\u0e34\u0c6a^\u0e34\uff09\u10da", "\u10da(\uff9f\u0434\uff9f\u10da)", "\u10da\uff08\u0b87 e \u0b87\uff40\u3002\u10da\uff09", "\u10da\uff08\u00b4\u2200`\u10da\uff09", "\u10da(\u2070\u2296\u2070\u10da)", "\u10da(\uff65 \u0e34\u03c9\uff65\u0e34\u10da)", "\u10da(^o^\u10da)", "\u10da(\u30fb\u2200\u30fb )\u10da", "\u10da(\uff40\u2200\u00b4\u10da)", "\u10da(\uff3e\u03c9\uff3e\u10da)", "\u10da(\u275b\u25e1\u275b\u273f)\u10da", "\u2501( \u0e34\u25d5\u3268\u25d5 \u0e34)\u2501", "( \u0e34\u25d5\u3268\u25d5 \u0e34)", "(\uff65\u0e34\u3268\uff65\u0e34)", "( \u0e34\u0398\u3268\u0398)\u0e34", "(\u00d2\u3268\u00d3\u05f1)", "(\u00b4\u25c9\u3268\u25c9)", "(\u25d5\u072b\u25d5)", "(\u00d2\u072b\u00d3\u05f1)", "(\u272a\u03c9\u272a)", "(\u273a\u03c9\u273a)", "(\u00d2\u03c9\u00d3\u05f1)", "(\u0301\u25d5\u25de\u3180\u25df\u25d5`)", "(;\u00b4\u0f0e\u0eb6\u0414\u0f0e\u0eb6`)", "\u262e\u2581\u2582\u2583\u2584\u263e \u265b \u25e1 \u265b \u263d\u2584\u2583\u2582\u2581\u262e", "\u25e2\u2586\u2585\u2584\u2583\u5d29\u2570(\u3012\u76bf\u3012)\u256f\u6f70\u2583\u2584\u2585\u2587\u25e3", "\u03a3\uff08\uff9f\u0434\uff9flll\uff09", "(*\u02d8\ufe36\u02d8*).\uff61.:*\u2661", "\u256e(\u256f3\u2570)\u256d\u3000\u256e(\u256f_\u2570)\u256d\u3000\u256e(\u256f\u25bd\u2570)\u256d \u3000\u256e(\uffe3\u25bd\uffe3)\u256d", "(\u256f\u25d5\u25de\u0c6a\u25df\u25d5\u2035)\u256f\ufe35 \u2534\u2500\u2534", "\uff08\u256f\uff0d\uff3f\uff0d\uff09\u256f\u2567\u2567", "\uff08\u256f\u2035\u25a1\u2032\uff09\u256f\ufe35\u2534\u2500\u2534", "\u263b", "\uff08\u30fb\u2200\u30fb\uff09", "(\u309d\u2200\uff65)", "\uff08\uff3e\u03c9\uff3e\uff09", "(o\uff9f\u03c9\uff9fo)", "(*\uff3e\u30fc\uff3e)", "(\u2267\u2200\u2266)", "( \u00b4\u2200`)", "(\u2312o\u2312)", "(^-^*)", "\u30fd( ^\u2200^)\uff89", "(\uff9f\u2200\uff9f )", "(\uff89\u2200\uff40 )", "(\uff40\u2200\u00b4)", "(\uffe3\u30fc\uffe3)", "\u30fd( \u00b4\u30fc)\u30ce", "(-\u03b9_- )", "(\u00b0c\uff3f,\u00b0`)", "\u2727*\uff61\u0669(\u02ca\u15dc\u02cb*)\u0648\u2727*\uff61", "(#\uff9f\u0414\uff9f)", "(\uff40_\u309d\u00b4)", "(`\u76bf\u00b4)", "(\uff89\uff40\u22bf\u00b4)\uff89", "\u30fd(\uff40\u0434\u00b4)\uff89", "( \u00b4\u2022\u0325\u0325\u0325\u03c9\u2022\u0325\u0325\u0325` )", "(;_;)", "(\uff1b\u0434\uff1b)", "(\uff89\u0414`)", "(\u3064\u0414`)", "\uff61\uff9f\uff65(\uff89\u0414`)\u4eba(\u00b4\u0414`)\u4eba(\u0414` )\uff65\uff9f\uff61", "(\u30ce\u0414\uff34)", "(\uff70\uff40 )", "(/\u0414`)~\uff9f\uff61", "\u03a3(\u00b0\u0414\u00b0;", "(\u00b0\u0414\u00b0)", "\u2211(\uffe3\u25a1\uffe3;)", "\u03a3(\u30fb\u03c9\u30fb\u30ce)\u30ce", "(\uff9f\uff9b \uff9f\uff89)\uff89", "\u03a3\u2517(\uff20\u30ed\uff20;)\u251b", "\u03a3\u30fd(\uff9f\u0414 \uff9f; )\uff89", "\u30fe(>\u0414<;)))).....", "(((\u250f(;\uffe3\u25bd\uffe3)\u251b", "\u30fe(\u3002\uffe3\u25a1\uffe3)\u30c4", "\u2514(\uffe3^\uffe3 )\u2510", "(\u309co\u309c(\u2606\u25cb=(-_- )", "(\uff4f\uffe3\u2207\uffe3)\uff1d\u25ef)`\u03bd\u309c)\uff65\uff1b'", "( \u00b4\u2200\uff40)\u03c3", "(\u03c3\u00b4\u25a1\uff40)\u03c3", "( \uff9f\u0414\uff9f)\u03c3", "(\u03c3\uff9f\u2200\uff9f)\u03c3\uff9f\u2200\uff9f)\u03c3\uff9f\u2200\uff9f)\u03c3", "\uffe3\ufe43\uffe3", "( \uff40\u76bf\u00b4)\uff89\"\u9e75=\u5ddb\u5ddb\u5ddb\u5ddb.:;:", "( \u30fbO\u30fb)\u30ce\u2312\u25cf", "( \uff9f\u0414\uff9f)\uff89\u2312\u2464", "(\uff65\u2200\uff65)\u3063/\u51f5", "(o`\uff65\u03c9\uff65)\uff89(\uff89\u0414`)", "(\u00b4\u30fc\uff40)y\u2500\u251b", "\u30fe(*\u03a6\u03c9\u03a6)\u30c4", "(\u00b4\u2200\uff40\u2229)\u2191\u2191", "(\uff9f\u0414\uff9f\u2261\uff9f\u0414\uff9f)?", "( \uff9f\u00c5\uff9f)", "\u2044(\u2044 \u2044\u2022\u2044\u03c9\u2044\u2022\u2044 \u2044)\u2044", "(\u2256\u203f\u309d\u2256)\u2727", "\u51f8(= _=)\u51f8", "( \u0361\u00b0 \u035c\u0296 \u0361\u00b0)", "\u2727(\uc21c_\uc21c)\"", "(\u256f\u1561\u1f64\u1561)\u256f", "\u2db8(\u159b \u0394 \u159c)\u2db8", "\u10da(\u2022\u0300 _ \u2022\u0301 \u10da)", "\u25d4\"L__\u25d4"]
--------------------------------------------------------------------------------
/jseg/Emodet/emo3.json:
--------------------------------------------------------------------------------
1 | ["(\uffe3\u25bd\uffe3)", "<(\uffe3\ufe36\uffe3)>", "\u2570(\uffe3\u25bd\uffe3)\u256f", "(\uffe3\ufe4f\uffe3)", "\u256e(\u256f\u25bd\u2570\")\u256d\u2606", "\u2570(\uffe3\ufe36\uffe3)\u256f", "(*\u03c9*)", "(#\uffe3\u25bd\uffe3)o\u256d\u256f \u2606\u2570\u256eo(\uffe3\u25bd\uffe3///)<", "\ufe3f(@\uffe3\ufe36\uffe3@)\ufe3f", "o(\u2035\u25bd\u2032)o", "(\u25cb^\uff5e^\u25cb)", "^(\uff0a)^", "\uff08*^\ufe4f^*\uff09", "(*^\uff20^*)", "\uff5e(\u256f\u25bd\u2570)\uff5e", "\uff5e(\uffe30\uffe3)\uff5e", "~(\uffe3\u25bd\uffe3)~", "(\ufe4b\ufe4f\ufe4c)", "\\(^\u03c9^)/", "\\(\u2267\u03c9\u2266)/", "\u2267\u25bd\u2266", "<\u03a6\ufe4f\u03a6>", ">(\u02d9\u03c9\u02d9)<", "(=\u02d9~\u02d9=)", "(\u03c3 \u2500.\u2500)\u03c3", "q(\uffe3\u03b5\uffe3 )p", "<(\uffe3><\uffe3)>", "(\u2032\u25a1\u2035)\u256d\uff5e", "(\u2032\uff31\u2035)\u256d\uff5e", "\\(\u256f\ufe4f\u2570)", "\u02cb(\u2032\uff5e\u2035\")\u02ca", "\u02cb( \u00b0 \u25bd\ufe51\u00b0|||| )", "^(\u02d9oo\u02d9)^", "^(\uffe3(oo)\uffe3)^", "=(\u2500(oo)\u2500)=", "\u222a(\uffe3\ufe41\uffe3)\u222a", "q(\u2032\u25a1\u2035)p", "(\uffe3y\u25bd\uffe3)\u256d", "\u03b5=\u03b5=\u03b5=\u03b5=\u03b5=\u03b5=\u256d(\uffe3\u25bd\uffe3)\u256f", "\uff5e(\uffe3\u25bd\uffe3)\uff5e(\uff3f\u25b3\uff3f)\uff5e(\uffe3\u25bd\uffe3)\uff5e(\uff3f\u25b3\uff3f)\uff5e(\uffe3\u25bd\uffe3)\uff5e", "( \u2032-`)y-\uff5e", "( \uffe3 c\uffe3)y\u2582\u03be", "(\u2032\u03c9\u2035)y-\uff5e", "\uff08\uffe3(\u221e)\uffe3\uff09y-\uff5e", "(*-*)y~", "<(\u2035\ufe35\u2032)>", "<(\u2035\u25a1\u2032)>", "\u2570(\u2035\u25a1\u2032)\u256f", "<(\u2035\u25b3\u2032)>", "\u2570(\u2035\u25b3\u2032)\u256f", "\u256e(\uffe3\u25a1\uffe3)\u256d", "(#\u2035\u25a1\u2032)\u256f", "\u2570(\u2035\u25a1\u2032#)", "\u51f8(\u2594\ufe35\u2594#)", "(\u256f \u2035\u25a1\u2032)\u256f", "\u51f8(\uffe3\u76bf\uffe3*)", "\u25cb(#\u2035\ufe3f\u2032\u3128)\u25cb", "(\u2035\u25a1\u2032)--\u25cf(#)\uffe3(00)\uffe3)", "( *\u2035\u0414\u2032)--\u25cf(#)\u2035\u22bf\u2032)\u256e", "(#\u2594\ufe35\u2594)=====\u25cf(\uffe3 \u22bf\"\uffe3)\u256d", "(#\u2035\u25bd\u2032)\uff0f~~~~~~~~~(\u2606___\u2606)", "(\u25cb\u2035\u25b3\u2032)=\u25cb\u25cb\u25cb\u25cb(\uffe3#)3\uffe3)", "(o \u2500_\u2500)==O)\u3002O\u3002)", "(#\u2594\u76bf\u2594)\u2500<(*\u2014_____\u2013)", "(*\u2500\ufe35\u2500)====Co( \u4e00//////\u4e00)o", "(#\u256f\uffe3\u25a1\uffe3)\u256f~\u2534\u2500\u2534", "(\u256f \u2035\u0414\u2032)\u256f\u2567\u2500\u2500\u2500\u2567", "\u2569\u2550\u2550\u2569 \uff5e \u2570(\uffe3\u25b3\uffe3\u2570)", "\u2534\u2534~\u2570(\u2035\u25a1\u2032#)", "(\u256f#\u2594__\u2594)\u256f\u2534\u2534\\(\"\uffe3\u25a1\uffe3)", "(\u256f\u2267\u25bd\u2266)\u256f ~\u2569\u2550\u2550\u2550\u2569", "(\u3012__\u3012)", "(\u3012\u25b3\u3012)", "(\u3012\u25c7\u3012)o\uff5e", "(\u3012\ufe35\u3012)", "(\u3012\ufe4f\u3012)", "q(\u3012\u25a1\u3012)p", "\u256e(\u3012\ufe4f\u3012)\u256d", "\uff08 \u2594\ufe3f\u2594\uff09\u256d\uff5e", "(\u03a0\u25b3\u03a0)", "\u03c3 >\ufe3f<\u03bf", "\u03c3(\u2025)", "(\u03a0___\u03a0)", "\u256e(\u256f_\u2570)\u256d", "\u256e\uff08\u256f\ufe4f\u2570\uff09\u256d", "\u256e\uff08\u256f\u2574\u2570\u2225\uff09\u256d", "\u256e(\u256f\uff5e\u2570\" )\u256d", "\u256e(\u2032\u03c9\u2035)\u256d", "\u256e(\ufe40\u03c9\ufe40\")\u256d", "\u256e(\u256f\u25bd\u2570)\u256d", "\u256e(\u2594\u25bd\u2594\")\u256d", "\u256e(\ufe4b\ufe4f\ufe4c)\u256d", "\u256e(\u2594\u03b5\u2594)\u256d", "/(\u256f\u25bd\u2570\")\\", "\u2196\u256d\u2570('(\u03a9)')\u256f", "\u02cb(\u2032\uff5e\u2035\")\u02ca", "\ufe3f(\u2032\u25a1\u2035)\ufe3f", "\u256e(\uffe3(oo)\uffe3)\u256d", "o(.\u301d.)o", "o(\u2027\"\"\u2027)o", "o(\u2027/////\u2027)o", "o(*/////*)o", "o(\uff20//////\uff20)o", "o(\u2299/////\u2299)o", "o(\u2500//////\u2500)o", "\\(\u2299O\u2299)/", "\\\u2299\u25bd\u2299/", "/(\u2032\u02d9\u2035)\\", "/(\u2032\u25a1\u2035)\\", "(\u00af \u25a1 \u00af)", "\\(\"\uffe3\u25a1\uffe3)/", "/(*\uffe3\u25b3\uffe3)\\", "(((m -_-)m", "( \u00af\ufe41\u00af )", "(-_____-x)", "(\uffe3\u25a1\uffe3||)", "\ufe4f\ufe4f(\u310f\uffe3\u25bd\uffe3)\u310f", "\\(\u2032\u25a1\u2035)/", "\u311f(\uffe3\u25bd\uffe3\u311f)\ufe4f\ufe4f", "(#\uffe3\u25bd\uffe3)o\u256d\u256f \u2605", "\\(\uffe3\u25a1\uffe3)/", "\u311f(\uffe3\u25bd\uffe3\u311f)\ufe4f", "\\(\uffe3\u25a1\uffe3*)/", "\u311f(\uffe3\u25bd\uffe3\u311f)\ufe4f\ufe4f", "\ufe4f\ufe4f(\u310f\uffe3\u25b3\uffe3)\u310f", "\u2570(\u3012\u25cb\u3012)\u256f", "\ufe4f\ufe4f(\u310f\uffe3\u25bd\uffe3)\u310f (\u4e00(oo)\u4e00)\uff0a \u311f(\uffe3\u25bd\uffe3\u311f)\ufe4f", "\u256d(\u256d(\u256d (\u310f\uffe3\u25b3\uffe3)\u310f", "\\(\"\uffe3\u25bd\uffe3)/", "\u256d(\u256d(\u256d\uff08\u310f\uffe3\u25a1\uffe3\uff09\u310f", "(\u4e00(oo)\u4e00)\uff0a", "(\u3003^\u25c7^\u3003)", "(*^ \u25c7 ^*)", "(@^\u25c7^@)", "(\u256f^\u25c7^)\u256f", "( \u2267\u25c7\u2266 )", "(\uff34\u25c7\uff34)", "(=\uffe3\u25c7\uffe3=)\uff36", "\\(\uffe3\u25c7\uffe3)/", "(*\uffe3\u25c7\uffe3)\u256f", "\u256e(\uffe3\u25c7\uffe3)\u256d", "(\u3128\u2035\u25c7\u2032)", "(\uff20\u25c7\uff20)", "(*\u2035\u25c7\u2032*)", "(\u3002\u25c7\u3002||)", "\\(\u03a0\u25c7\u03a0)", "(\uffe3\u25c7\uffe3)\u51f8", "\u03a8(\u2267\u25c7\u2266)\u03a8", "((((<||\uff3f\u25c7\uff3f>", "|||||\u256d(\uff3f\u25c7\uff3f)\u256e|||||", "(-\u25ce\u25c7\u25ce-)o", "-{}@{}@{}-\u03c5(\uffe3\u25c7\uffe3*)", "(\uffe3\u2554\u25c7\u2557\uffe3)", "\u5ddd\u2027\u25c7\u2027\u5ddd", "\u5dde\u2027\u25c7\u2027\u5dde", "| \u67f1 |\u2510(\uffe3\u25c7\uffe3*)", "\u2514|\u2027\u25c7\u2027|\u2510", "\u2225\u2225<\u2225\u2032 \u25c7\u2035 || >\u2225\u2225", "|\u03a6|<|\u2032|\u25c7|\u2035|>|\u03a6|", "\u2560\u2566\u2569\u2566\u2566\u2569\u2563\u25c7\u2027)", "\u252c\u2534\u252c\u2534\u2524(\u2027\u25c7\u251c\u252c\u2534\u252c", "\uff3c(\uffe3\u3000\u3000\u3000\u25c7,\u3000\u3000\u3000\uffe3)\uff0f", "\u250c|\u2267\u25c7\u2266|\u2518\u505a\u2514|\u2267\u25c7\u2266|\u2510\u9ad4\u250c|\u2267\u25c7\u2266|\u2518\u64cd", "|||\u256d(\uff3f\u25c7\uff3f)\u256e|||", "(\u3128\u2035\u25c7\u2032)\u51f8", "(\u3112\u25c7\u3112)o\uff5e", "\u5ddd\u2027\u25c7\u2027\u5ddd", "-@{}@{}-\u03c5(\uffe3\u25bd\uffe3*)"]
--------------------------------------------------------------------------------
/jseg/Emodet/emo4.json:
--------------------------------------------------------------------------------
1 | ["((((((\u25cb\u309c\u03b5\u309c\u25cb)\u30ce~~", "~~~\u30fe(\u25cb\u309c\u25bd\u309c\u25cb)", "(\uff20\u309c\u25bd\u309c@)\u30ce~~~", "((\u25cf\u309cc_\u309c\u25cf))", "\u30fe(\u25cf\u00b4\u25bd\uff40\u25cf)\u30ce", "(\uff4f\u00b4\u8278\uff40\uff4f)", "\u30fe(\uff20\u309c\u25bd\u309c@)\u30ce", "...\"8-( \u25cf\uff40\u03b5\u00b4\u25cf)\u723b(\u25cf\uff40\u03b5\u00b4\u25cf )-8\"", "(\u2032\u222a`\u8278)\u2312*\u3002", ".\u3002.(\u3002\u00b4\u30fb\u03c9\u30fb\u3002)\u30ce\u309b .\u2661\u3002:", "(\u30ce\u03b5\u309c\u25cf)", "(\u2022\uff56'\u221e\u4eba)", "\u3000\u3000", "\uff08d\u2022\u309cv`\u25cf)+\u309c\u221e", "(\u30ce)'\u0434\uff40(\u30fe)", "\u30fd\u2022\u00b4\u0437`\u2022\u30ce\u266c", "(\u25cf'3`)\u30ce\u2312\u2665", "\u30fe(\u2665'3`\u2660))\u30ce", "\u2191\u2191.+\u309c*\u30fd(\u32a4\u00b4\u2200`*\u32a4)\u30ce\u3002:\u309c+", "(\u309d\u03c9\u30fb)", "\uff08\u3003\u30fb\u222a\u30fb\uff40\uff09", "(**\u25d1\u2200\u25d0)", "(o\u2661\u212b\u2661o)", "(\u30fbc_,\u30fb$)", "(\u3003\u00b4\u2022\u222a\u2022\uff40)", "d(\u309d\uff43_,\u30fb*\uff09b", "(\u25cf*\u00b4\u0414`*)\u30ce??", "(,,\u309cc_,\u309c*)\u30ce", "(\u25cf*>_,\uff1cp\u559cq)*", "\u30fe(o\u309c_,\u309co)\u30ce", "*:.\u3002.\u3002.:*\u30fe(\u25cf\u00b4\u30fb\u03c9\u30fb)(\u30fb\u03c9\u30fb`\u25cb)\u30ce*:.\u3002.\u3002.:*", "\u30ce(\u2606\u00b4\u03b5\uff40\u2261)\u30ce", "\u3001(\u30fbc_,\u30fb\u3002)", "(*\u00b4\u03c9\uff40)\u3063", "(\u30ce\u30fb\u03c9\u30fb)\u30ce\u30fe(\u30fb\u03c9\u30fb\u30fe)", "(\u03c3\u30fb\u0414\u30fb)\u03c3\u2605", "(\uff40\u30fb\u03c9\u00b4\u30fb+)", "(*\u309d\u03c9\u30fb*)\u30ce", "\u30fe(\u25cf\u00b4\uff35\uff40\u25cf)\u30ce", "\u2661\u30fd(\u25cf\u00b4\u03b5\uff40\u25cf)\u30ce\u2661", "\u30fd(\u00b4\u2200\uff40\u25cf)\u30ce", "\u30fe(*\u00b4\u2200\uff40*)\u30ce", "\u309c\u30fb*+:\u309c((((\u00b4*\u30fb\u0434\u30fb*`))))\u309c\u30fb*+:\u3002", "\u3002\uff4f\u25cbd(-\u03c9\u30fb\u3002)", "(\u2605'A`!)", "(\u266f'`\u2605!) ", "\uff08\u3064'A`\u309c\uff09\u309c+\u309c", "\u30fb:*:\u30fb\u30a3\u30a3!!(*\u00b4\u25bc\uff40*\u4eba)\u30fb:*:\u30fb", "(*'v`*)\u309c+ ", "\u30fe(`\u30fb\u03c9\u30fb\u00b4) ", "(\uff42'`*\uff44))", "(\u25cb'\u8278`\u25ce)", "\u25bc*\u309cv\u309c*\u25bc", "\u2514(\u309c\u2200\u309c\u2514)(\u2518\u309c\u2200\u309c)\u2518", "......\u250c\u2524\u00b4\u0434`\u251c\u2510", "(\u309c\u2200\u309c\u30ce\u30ce\u309b\u2606' ", "\u30fd( \u30fb\u2200\u30fb)\u30ce\u250c\u251b\u03a3(\u30ce `\u0414\u00b4)\u30ce", "o\uff1c\uff0d\u2200\uff0d\uff1eo", "(q\uff400\u00b4p)", "\u03a3 \u309c\u30ed\u309c\u2261( \u3000 \u30ce)\u30ce ", "\u3002+\u309c\u2605(\u4eba'v\uff40*)\u2606\u309c+\u3002", "\u3002+\u309c\u30fe(o\u309c\u2200\u309co)\u30ce\u3002+\u309c ", "\u2606.\u3002.:*(\u5b09\u00b4\u0414\uff40\u5b09).\u3002.:*\u2606", "\u30fd(\u25cf\u309d\u2200\u30fb\u25cf)\u30ce\u30fb.\u3002*", "\u30fe@(o\u30fb\u30a7\u30fbo)@\u30d0\u30a4\u30d0\u30a4\u266a", "~~~\u30fe(\u25cb\u309c\u25bd\u309c\u25cb) ", "((((((\u25cb\u309c\u03b5\u309c\u25cb)\u30ce~~", "\u30fb\u309c\u30fb\u30fb:*:\u30fb\u30fb\u309c\u30fb(\u25cf\u00b4\u2200\uff40 \u25cf)\u30fb\u309c\u30fb\u30fb:*:\u30fb\u30fb\u309c\u30fb", "\u03b5=\u03b5=(\u25cf\u30fb\u2207\u30fb\u4eba\u25ce\u2267\u76bf\u2266 \uff09", "\u30fe(\u25cf\u00b4\u25bd\uff40\u25cf)\u30ce", "\u30fe(\uff20\u309c\u25bd\u309c@)\u30ce", "((\u25cf\u309cc_\u309c\u25cf))b", "(\u2605\u309d\u2200\u30fb\u2606)\u30ce", "(*\u00b4\u2200\uff40*)", "\u30fe(o\u309c\u30a7\u309co)\u30ce", "(\u00b4\u30fb\u03c9\u30fb`)(\u00b4\u30fb\u03c9:;.:..(\u00b4\u30fb:;...::;.:.:: ", "\u2605\u309c+.\u30fd(*\u3002>\u0414<)o\u309c+.\u309c\u2606", "(*\u2609\u0c6a\u2299*)", "\u30f5\u309b\u2500\u2501\u2500\u2501(*\u309c\u0414\u2609*)\u2500\u2501\u2500\u2501\u30f3!!", "\u03a3(\u2609A\u2299\uff4c|\uff4c)", "(\u25cf\u00b4\u2200\uff40\u25cf)\u30ce\u3002.:*\u30fb\u309c*:.\u3002.", "(\u25cf\u00b4\u30fb\u03c9\u30fb)\u30ce", "\u30fe(o\u00b4\u03c9\uff40o)\u2663\u2667\uff27\uff4f\uff27\uff4f\uff27\uff4f\uff01\u2667\u2663(o\u00b4\u03c9\uff40o)\u30b7", "(\uff40\u30fb\u03c9\u30fb)\u03c3\u03c3\u3010\u309c+.\u309c\u043d\u0451\u03b9\u03b9\u03bf\u309c+.\u309c\u3011(\u30fb\u03c9\u30fb\u00b4)\u30b7", "(\u25ce\u00b4\u8278\uff40))\u3010\u2666\u2666\u0397\u0451\u03b9\u03c3\u03c3\u2666\u2666\u3011((\u00b4\u8278\uff40\u25ce)", "\u30fe(\u25cf\u00b4\u212b`)\u3010\u266c\u043d\u0451\u03b9\u03b9\u03bf\u266c\u3011(\u00b4\u212b\uff40\u25cf)\u30b7", "(\u30fb\u03c9\u30fb\u3002)", "(\uff0e\uff0e\u3002)", "(\u3002\u30fb\u03c9\u30fb)", "\u30fd\uff08\u309c\u2200\u309c\uff09", "(\u25cf\u309c\uff56\uff40)\u30ce", "\u30fd(\u309c\uff56\uff40\u25cf)", ".+:\u3002(\u25cf\u309d\u03c9\u30fb)\uff5e\u2606NE\uff57!!\u309c.+:", "(\u3002\uff0e\uff0e) ", "\u30fe(\u2312\u2207\u2312*)See You\u266a", "(\u25cf\u2312\u2200\u2312\u25cf)", "\u30fe(\uffe3\u2207\uffe3)Bye\u30fe(\uffe3\u2207\uffe3)Bye"]
--------------------------------------------------------------------------------
/jseg/Emodet/emoall.txt:
--------------------------------------------------------------------------------
1 | (メ゚Д゚)メ
2 | (〃^◇^〃)
3 | Zz(´-ω-`*)
4 | (꒪ͦᴗ̵̍꒪ͦ )
5 | (`へ´≠)
6 | (♡˙︶˙♡)
7 | |Φ|<|′|◇|‵|>|Φ|
8 | (( へ(へ´∀`)へ
9 | ╮(﹋﹏﹌)╭
10 | (ó㉨ò)
11 | (́^◞౪◟<‵)
12 | ( ´∀`);y=ー(゚д゚)・∵. ターン
13 | (*˘︶˘*).。.:*♡
14 | (-◎◇◎-)o
15 | ゚Å゚)
16 | 爻((╬ಠิ益ಠิ))イカリズムッ!!
17 | ヽ( ^∀^)ノ
18 | (́提◞౪◟供‵)
19 | ╮(′ω‵)╭
20 | -@{}@{}-υ( ̄▽ ̄*)
21 | (((o(*゚▽゚*)o)))
22 | ╮(╯∀╰)╭
23 | 。・゚・(つд`゚)・゚・
24 | (^_っ^)
25 | σ >︿<ο
26 | ( ◞≼☸≽◟ ._ゝ◞≼☸≽◟)
27 | ( 3ω3)
28 | (「・ω・)「
29 | ( *´◒`*)
30 | (^-^*)
31 | ( • ̀ω•́ )
32 | (́✺◞౪◟✺‵)
33 | d(`・∀・)b
34 | (*゚∀゚*)
35 | o(‵▽′)o
36 | ⶸ(ᖛ Δ ᖜ)ⶸ
37 | ( ´゚,_」゚)バカジャネーノ
38 | (。・㉨・。)
39 | ╠╦╩╦╦╩╣◇‧)
40 | ☻
41 | (´・ω・)つ旦
42 | ╮(▔▽▔")╭
43 | ┐(´д`)┌
44 | (゚д⊙)
45 | _(´ཀ`」 ∠)_
46 | /(* ̄△ ̄)\
47 | (╬☉д⊙)
48 | ヾ(。 ̄□ ̄)ツ
49 | Σ(;゚д゚)
50 | ( ̄(エ) ̄)
51 | ^(*)^
52 | (´・ω・`)
53 | (=˙~˙=)
54 | ⊙▃⊙
55 | 。゚ヽ(゚´Д`)ノ゚。
56 | ∑(ι´Дン)ノ
57 | ♡(*´∀`*)人(*´∀`*)♡
58 | <( ̄︶ ̄)>
59 | -`д´-
60 | (◞≼◉ื≽◟;益;◞≼◉ื ≽◟)
61 | (T◇T)
62 | (ゝ∀・)
63 | (#╯ ̄□ ̄)╯~┴─┴
64 | ε=ε=ε=ε=ε=ε=╭( ̄▽ ̄)╯
65 | (゜皿。)
66 | (〒︿〒)
67 | 凸(= _=)凸
68 | ლ(⁰⊖⁰ლ)
69 | (-_____-x)
70 | (ᇂдᇂ )
71 | (*≥▽≤)ツ┏━┓
72 | (́ತ◞౪◟ತ‵)
73 | ლ|^Д^ლ|
74 | (#‵□′)╯
75 | ︿(′□‵)︿
76 | ( ˘•ω•˘ )
77 | (́墨◞౪◟鏡‵)
78 | (శωశ)
79 | ┌|◎o◎|┘
80 | ・゜・(PД`q。)・゜・
81 | (╬゚д゚)
82 | ╮(╯╴╰∥)╭
83 | (눈‸눈)
84 | ◑ω◐
85 | =.=
86 | ☜(:♛ฺ;益;♛ฺ;)☞
87 | (*‵◇′*)
88 | (<ゝω・) 綺羅星☆
89 | (°Д°)
90 | ヾ(´︶`*)ノ♬
91 | ┏( .-. ┏ ) ┓
92 | (´c_`)
93 | ⊂彡☆))д`)
94 | ( ̄▽ ̄)
95 | (┛`д´)┛
96 | q(′□‵)p
97 | //( ◕‿‿◕ )\
98 | (๑・ิ⋖⋗・ิ๑)
99 | ლ(╹ε╹ლ)
100 | (o`・ω・)ノ(ノД`)
101 | ᕕ ( ᐛ ) ᕗ
102 | ჰჰჰ❛‿❛ჴჴჴ
103 | ρ(・ω・、)
104 | (。-_-。)
105 | (´・_・`)
106 | ლ(́◕◞౪◟◕‵ლ)
107 | = =
108 | (๑•́ ₃ •̀๑)
109 | \\\\٩( 'ω' )و ////
110 | ( ´゚Д゚`)
111 | ( ‘д‘⊂彡☆))Д´)
112 | (;_;)
113 | ლ(´•д• ̀ლ
114 | Σ(゚д゚)
115 | <*)) >>=<
116 | (•ㅂ•)/
117 | ヾ(*ΦωΦ)ツ
118 | ( ゚∀゚)つ≡≡≡♡♡♡)`ν゜)グシャ
119 | \(′□‵)/
120 | ε=ε=ヾ(;゚д゚)/
121 | _(┐「﹃゚。)_
122 | ㄟ( ̄▽ ̄ㄟ)﹏
123 | ~(╯▽╰)~
124 | o(☆Ф∇Ф☆)o
125 | (*´ω`)人(´ω`*)
126 | ┳━┳ノ( OωOノ)
127 | ~(~o ̄▽ ̄)~o.....o~(_△_o~)~..
128 | (゚⊿゚)
129 | (〒︵〒)
130 | (:◎)≡
131 | 。゚・(ノД`)人(´Д`)人(Д` )・゚。
132 | ┳━┳ノ( ' - 'ノ)
133 | (#゚Д゚)
134 | ლ(・∀・ )ლ
135 | (́≖◞౪◟≖‵)/
136 | (´~`)
137 | (¦3[▓▓]
138 | (╯≧▽≦)╯ ~╩═══╩
139 | (❛◡❛✿)
140 | 乁( ◔ ౪◔)「
141 | (ー` )
142 | ( ′-`)y-~
143 | (´-ω-`)
144 | (σ´□`)σ
145 | ᕦ(ò_óˇ)ᕤ
146 | ₍₍◝(・'ω'・)◟⁾⁾
147 | ◢▆▅▄▃ 溫╰(〞︶〝) ╯馨 ▃▄▅▆◣
148 | ( ºΔº )
149 | 흫_흫
150 | (°c_,°`)
151 | (╬♛. 益♛. )
152 | (́✸◞♉◟✸‵)
153 | (=´ᴥ`)
154 | ఠ_ఠ
155 | : ♡。゚.(*♡´◡` 人´◡` ♡*)゚♡ °・
156 | (╬ಠ益ಠ)ゴルァ!!
157 | ━(゚∀゚)━( ゚∀)━( ゚)━( )━( )━(゚)━(∀゚)━(゚∀゚)━
158 | (◔⊖◔)つ
159 | (,,゚Д゚)
160 | ⁄(⁄ ⁄•⁄ω⁄•⁄ ⁄)⁄
161 | ಸ_ಸ
162 | (^ω^)
163 | ლ(・´ェ`・ლ)
164 | (⁰▿⁰)
165 | (メ3[____]
166 | ﹏﹏(ㄏ ̄▽ ̄)ㄏ
167 | ლ(́ಢ.◞౪◟ಢ‵ლ)
168 | ヽ(゜▽゜ )-C<(/;◇;)/~[拖走]
169 | (´◓Д◔`)
170 | (≧∀≦)ゞ
171 | (´д((☆ミPia!⊂▼(o ‵-′o)▼つPia!彡★))∀`)
172 | ˋ(′~‵")ˊ
173 | (o ̄∇ ̄)=◯)`ν゜)・;'
174 | ✧◝(⁰▿⁰)◜✧
175 | <(_ _)>
176 | (。A。)
177 | (′Q‵)╭~
178 | (๑•̀ㅂ•́)و✧
179 | (☞゚∀゚)゚∀゚)☞
180 | (´≝◞⊖◟≝`)
181 | (●б人<●)
182 | ((└(:3」┌)┘))
183 | ( •́ _ •̀)?
184 | (́◕///◞౪◟///◕‵)/
185 | (๑¯∀¯๑)
186 | (◞‸◟)
187 | (* ̄◇ ̄)╯
188 | ヽ(㊤V㊤*)ノ
189 | \(" ̄□ ̄)/
190 | (′・_・`)
191 | Σ(°Д°;
192 | <( ̄>< ̄)>
193 | (ㄨ‵◇′)凸
194 | ◥(ฅº₩ºฅ)◤
195 | (‘⊙д-)
196 | ლ( • ̀ω•́ )っ
197 | ( ̄╔◇╗ ̄)
198 | ㄟ( ̄▽ ̄ㄟ)﹏﹏
199 | (́۞◞౪◟۞‵)
200 | (@◇@)
201 | ( ̄(∞) ̄)y-~
202 | (  ̄ c ̄)y▂ξ
203 | (ÒωÓױ)
204 | (@^◇^@)
205 | (」・ω・)」うー!(/・ω・)/にゃー!
206 | (﹋﹏﹌)
207 | (|||゚д゚)
208 | (Ò㉨Óױ)
209 | ( ̄﹏ ̄)
210 | (▰˘◡˘▰)
211 | (╯ᕡὤᕡ)╯
212 | \⊙▽⊙/
213 | ƪ(•̃͡ε•̃͡)∫
214 | ( ´∀`)σ
215 | (’へ’)
216 | (╯°Д°)╯ ┻━┻
217 | (((┏(; ̄▽ ̄)┛
218 | ԅ(¯﹃¯ԅ)
219 | (゚∀。)
220 | 州‧◇‧州
221 | ヽ(∀゚ )人(゚∀゚)人( ゚∀)人(∀゚ )人(゚∀゚)人( ゚∀)ノ
222 | ( ゚Д゚)σ
223 | ( ゚Д゚)ノ⌒⑤
224 | ヽ( ° ▽°)ノ
225 | /(′˙‵)\
226 | (●⁰౪⁰●)
227 | (❍ᴥ❍ʋ)
228 | ‷̗ↂ凸ↂ‴̖
229 | ╰( ̄▽ ̄)╯
230 | ..._〆(°▽°*)
231 | (゚∀゚ )
232 | ╮(〒﹏〒)╭
233 | ︿(@ ̄︶ ̄@)︿
234 | (΄ಢ◞౪◟ಢ‵)◉◞౪◟◉)
235 | (メ ゚皿゚)メ
236 | (*´з`*)
237 | ε≡(ノ´_ゝ`)ノ
238 | (〃∀〃)
239 | ~( ̄▽ ̄)~(_△_)~( ̄▽ ̄)~(_△_)~( ̄▽ ̄)~
240 | ಠ_ಠ
241 | (・ิω・ิ)
242 | ヽ(=^・ω・^=)丿
243 | (。í _ ì。)
244 | ˋ( ° ▽﹑°|||| )
245 | (́థ◞..◟థ‵)
246 | ( ´∀`)つt[ ]
247 | ( ̄ε(# ̄)☆
248 | (‵□′)--●(#) ̄(00) ̄)
249 | ( ´・◡・`)
250 | (:3[___]=
251 | (`_ゝ´)
252 | (。・ω・。)
253 | (´◔∀◔`)
254 | (ლ ^ิ౪^ิ)ლ
255 | ﹏﹏(ㄏ ̄▽ ̄)ㄏ (一(oo)一)* ㄟ( ̄▽ ̄ㄟ)﹏
256 | (•‾⌣‾•)
257 | ....ˊˋ------。:.゚_ヽ(_´∀`_)ノ_.:。((浮水
258 | ・*・:≡( ε:)
259 | 魔貫光殺砲(゚Д゚)σ━00000000000━●
260 | ╰(‵□′#)
261 | (= ̄◇ ̄=)V
262 | ( ´・・)ノ(._.`)
263 | (ノ∀` )
264 | ヽ(#`Д´)ノ
265 | (゚Д゚≡゚Д゚)?
266 | ┌|≧◇≦|┘做└|≧◇≦|┐體┌|≧◇≦|┘操
267 | (#▔︵▔)=====●( ̄ ⊿" ̄)╭
268 | (#‵▽′)/~~~~~~~~~(☆___☆)
269 | _ノ乙(、ン、)_
270 | (´-ωก`)
271 | (*^ ◇ ^*)
272 | ( ᐛ)パァ
273 | ლ(・ ิω・ิლ)
274 | (́>◞౪◟<‵)ノシ
275 | (*‘ v`*)
276 | ( ¯﹁¯ )
277 | (ヾノ・ω・`)
278 | (゚ω´)
279 | (σ回ω・)σ←↑→↓←↑
280 | キタ━━━━(゚∀゚)━━━━!!
281 | _(:3 」∠ )_
282 | (ノ◕ヮ◕)ノ*:・゚✧
283 | o(@//////@)o
284 | _(¦3」∠)_
285 | öㅅö
286 | (゚ロ ゚ノ)ノ
287 | (╯-_-)╯╧╧
288 | (一(oo)一)*
289 | (●▼●;)
290 | (╬゚д゚)▄︻┻┳═一
291 | (^u^)
292 | (`・ω・´)
293 | ( ′∀`)σ≡σ☆))Д′)
294 | ( ~'ω')~
295 | :;(∩´﹏`∩);:
296 | (≖ᴗ≖๑)
297 | (ㄏ ̄▽ ̄)ㄏ ㄟ( ̄▽ ̄ㄟ)
298 | <Φ﹏Φ>
299 | (ノ`⊿´)ノ
300 | (ノ・_・)ノ凹 ┣凹━凹━凹┫
301 | ╮(╯3╰)╭ ╮(╯_╰)╭ ╮(╯▽╰)╭ ╮( ̄▽ ̄)╭
302 | (・ω´・ )
303 | ヽ( ´ー)ノ
304 | Φ౪Φ
305 | (*ω*)
306 | ╭(╭(╭(ㄏ ̄□ ̄)ㄏ
307 | ฅ(๑*д*๑)ฅ!!
308 | ╮(╯▽╰")╭☆
309 | ╭(╭(╭ (ㄏ ̄△ ̄)ㄏ
310 | (́⊙◞౪◟⊙‵)
311 | (灬ºωº灬)
312 | -_-||
313 | Σ(゚Д゚;≡;゚д゚)
314 | (΄◉◞౪◟◉‵)
315 | ლ(`∀´ლ)
316 | (○‵△′)=○○○○( ̄#)3 ̄)
317 | (́。◞౪◟。‵)
318 | (○^~^○)
319 | ٩(。・ω・。)و
320 | (“ ̄▽ ̄)-o█ █o-( ̄▽ ̄”)/
321 | ┬┴┬┴┤(‧◇├┬┴┬
322 | d(・ω・d) 微分!(∫・ω・)∫ 積分!∂(・ω・∂) 偏微分!(∮・ω・)∮ 沿閉曲線的積分!(∬・ω・)∬ 重積分!∇(・ω・∇)梯度!∇・(・ω・∇・)散度!∇×(・ω・∇×)旋度!Δ(・ω・Δ)拉普拉斯!
323 | ╰(‵△′)╯
324 | (ಠ益ಠ)
325 | (´,_ゝ`)
326 | (・∀・)っ/凵
327 | (((゚Д゚;)))
328 | (╥﹏╥)
329 | (́〠◞౪◟〠‵)
330 | Ψ(≧◇≦)Ψ
331 | ( ≧◇≦ )
332 | (/ ↀ3ↀ)/q(﹌ω﹌ )
333 | ლ(╹◡╹ლ)
334 | └( ̄^ ̄ )┐
335 | (o´・ω・`)σ)Д`)
336 | (;д;)
337 | _(:3 ⌒゙)_
338 | ( ̄◇ ̄)凸
339 | XD
340 | ╮(╯﹏╰)╭
341 | (^y^)
342 | (╯ ‵Д′)╯╧───╧
343 | (σ′▽‵)′▽‵)σ
344 | ಠ益ಠ
345 | (´,,•ω•,,)♡
346 | (′□‵)╭~
347 | (/Д`)~゚。
348 | (o・e・)
349 | ( ╬◣ 益◢)y
350 | ∥∥<∥′ ◇‵ || >∥∥
351 | ≧▽≦
352 | ~( ̄0 ̄)~
353 | (´;ω;`)
354 | ( っค้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้c )
355 | ξ( ✿>◡❛)▄︻▇▇〓▄︻┻┳═一
356 | (´⊙ω⊙`)
357 | (≖_≖)✧
358 | ||Φ|(|´|Д|`|)|Φ||
359 | (∩^o^)⊃━☆゚.*・。
360 | o(‧/////‧)o
361 | (=´ω`=)
362 | Σ>―(〃°ω°〃)♡→
363 | (╯‵□′)╯︵┴─┴
364 | (*´・д・)?
365 | _(°ω°」 ∠)
366 | (・∀・)つ⑩
367 | (-ι_- )
368 | (́▣◞౪◟▣‵)
369 | ┬─┬ ノ( ' - 'ノ)
370 | ٩(ŏ﹏ŏ、)۶
371 | (# ̄▽ ̄)o╭╯ ☆╰╮o( ̄▽ ̄///)<
372 | ( శ 3ੜ)~♥
373 | (づ′▽`)づ
374 | ´-ω-)b
375 | o(*/////*)o
376 | \(≧ω≦)/
377 | (☉д⊙)
378 | (′ω‵)y-~
379 | (ΦωΦ)
380 | (́☜◞౪◟☞`)/
381 | (〒△〒)
382 | ξ( ✿>◡❛)
383 | (´◔ ₃ ◔`)
384 | (o ─_─)==O)。O。)
385 | (Π△Π)
386 | (=m=)
387 | o(⊙/////⊙)o
388 | (´_ゝ`)
389 | (°ཀ°)
390 | ๛ก(ー̀ωー́ก)
391 | (→ܫ←)
392 | ( ╯' - ')╯ ┻━┻
393 | (´◉㉨◉)
394 | (=゚Д゚=) ▄︻┻┳━ ·.`.`.`.
395 | (っ・Д・)っ
396 | 。:.゚ヽ(*´∀`)ノ゚.:。
397 | m(。≧エ≦。)m
398 | ( メ∀・)
399 | ( ・・)つ―{}@{}@{}-
400 | (◕ܫ◕)
401 | ( ͡° ͜ʖ ͡°)
402 | (〒◇〒)o~
403 | ヽ(́◕◞౪◟◕‵)ノ
404 | (◜◔。◔◝)
405 | ( ¤̴̶̷̤́ ‧̫̮ ¤̴̶̷̤̀ )
406 | (́◉◞౪◉*◉◉◞◕౪౪◟◕◉◉φ◉౪◞◉*‵)
407 | (*´д`)~♥
408 | Ỏ̷͖͈̞̩͎̻̫̫̜͉̠̫͕̭̭̫̫̹̗̹͈̼̠̖͍͚̥͈̮̼͕̠̤̯̻̥̬̗̼̳̤̳̬̪̹͚̞̼̠͕̼̠̦͚̫͔̯̹͉͉̘͎͕̼̣̝͙̱̟̹̩̟̳̦̭͉̮̖̭̣̣̞̙̗̜̺̭̻̥͚͙̝̦̲̱͉͖͉̰̦͎̫̣̼͎͍̠̮͓̹̹͉̤̰̗̙͕͇͔̱͕̭͈̳̗̭͔̘̖̺̮̜̠͖̘͓̳͕̟̠̱̫̤͓͔̘̰̲͙͍͇̙͎̣̼̗̖͙̯͉̠̟͈͍͕̪͓̝̩̦̖̹̼̠̘̮͚̟͉̺̜͍͓̯̳̱̻͕̣̳͉̻̭̭̱͍̪̩̭̺͕̺̼̥̪͖̦̟͎̻̰_Ỏ̷͖͈̞̩͎̻̫̫̜͉̠̫͕̭̭̫̫̹̗̹͈̼̠̖͍͚̥͈̮̼͕̠̤̯̻̥̬̗̼̳̤̳̬̪̹͚̞̼̠͕̼̠̦͚̫͔̯̹͉͉̘͎͕̼̣̝͙̱̟̹̩̟̳̦̭͉̮̖̭̣̣̞̙̗̜̺̭̻̥͚͙̝̦̲̱͉͖͉̰̦͎̫̣̼͎͍̠̮͓̹̹͉̤̰̗̙͕͇͔̱͕̭͈̳̗̭͔̘̖̺̮̜̠͖̘͓̳͕̟̠̱̫̤͓͔̘̰̲͙͍͇̙͎̣̼̗̖͙̯͉̠̟͈͍͕̪͓̝̩̦̖̹̼̠̘̮͚̟͉̺̜͍͓̯̳̱̻͕̣̳͉̻̭̭̱͍̪̩̭̺͕̺̼̥̪͖̦̟͎̻̰
409 | ヽ(`д´)ノ
410 | ლ(╯⊙ε⊙ლ╰)
411 | (੭ु´ ᐜ `)੭ु⁾⁾
412 | (๑´ڡ`๑)
413 | ฅ●ω●ฅ
414 | ( ̄ー ̄)
415 | (  ̄□ ̄)/ 敬禮!! <( ̄ㄧ ̄ ) <( ̄ㄧ ̄ )
416 | (`┌_┐´)
417 | ↖╭╰('(Ω)')╯
418 | (‾◡◝ )
419 | (((m -_-)m
420 | (ㄨ‵◇′)
421 | ( ◕‿‿◕ )
422 | (*☉౪⊙*)
423 | ( ´☣///_ゝ///☣`)
424 | Σ┗(@ロ@;)┛
425 | ○(#‵︿′ㄨ)○
426 | (◞≼●≽◟◞౪◟◞≼●≽◟)
427 | ( *¯ ³¯*)♡ㄘゅ
428 | ╰( ̄︶ ̄)╯
429 | (*´艸`*)
430 | 凸(▔︵▔#)
431 | ヽ(・×・´)ゞ
432 | Σ(lliд゚ノ)ノ
433 | (つд⊂)
434 | (//´/◒/`//)
435 | ( ^ิ౪^ิ)
436 | \(Π◇Π)
437 | >///<
438 | ( ´(00)`)
439 | (✧≖‿ゝ≖)
440 | (☍﹏⁰)
441 | (・∀・)ノ三G[__]コ
442 | (╬ಠิ益ಠิ)
443 | ⊂(°Д°⊂⌒`つ≡≡≡(´⌒;;;≡≡≡
444 | ლ(இ e இ`。ლ)
445 | (〒﹏〒)
446 | ( ゚∀゚)o彡゚
447 | |Д`)ノ⌒●~*
448 | (´>∀)人(´・ω・)ノヽ(・ε・*)人(-д-`)
449 | Σ(゚ω゚)
450 | ヾ(●゜▽゜●)♡
451 | \( ̄◇ ̄)/
452 | (o´罒`o)
453 | ╮(╯~╰" )╭
454 | (σ゚∀゚)σ゚∀゚)σ゚∀゚)σ
455 | (´-`).。oO
456 | (ノ∀`*)
457 | (๑≖ิิ益≖ิ๑)
458 | ☝( ◠‿◠ )☝
459 | Σ( ° △ °)
460 | (Π___Π)
461 | \(●´ϖ`●)/
462 | ε(*´・∀・`)з゙
463 | (⁰⊖⁰)
464 | (#゚⊿`)凸
465 | (๑ơ ₃ ơ)♥
466 | (́=◞౪◟=‵)
467 | ╰(‵□′)╯
468 | (-`ェ´-╬)
469 | どこ━━━━(゜∀゜三゜∀゜)━━━━!!??
470 | (;´༎ຶД༎ຶ`)
471 | (。>﹏<)哈啾
472 | ლ(^ω^ლ)
473 | 川‧◇‧川
474 | (╯◕◞౪◟◕‵)╯︵ ┴─┴
475 | ( ゚ Χ ゚)
476 | (っ●ω●)っ
477 | (╬゚◥益◤゚)
478 | ٩(๑•̀ω•́๑)۶
479 | ↁ_ↁ
480 | il||li _| ̄|○ヽ(・ω・`)
481 | (´A`。)
482 | =(─(oo)─)=
483 | (♛‿♛)
484 | ʅ(´◔౪◔)ʃ
485 | m9(^Д^)プギャー
486 | (ÒܫÓױ)
487 | ╮(′~‵〞)╭
488 | (☄◣ω◢)☄
489 | ヽ(`Д´)ノ
490 | (╬゚ ◣ ゚)
491 | (σ゚∀゚)σ..:*☆
492 | (◔౪◔)
493 | (#`皿´)
494 | (。◕∀◕。)
495 | ヾ(◎´・ω・`)ノ
496 | ತಎತ
497 | (๑´ㅁ`)
498 | (´・Å・`)
499 | (((゚д゚)))
500 | ( ´థ౪థ)σ
501 | ─=≡Σ((( つ•̀ω•́)つ
502 | (´;ω;`)ヾ(・∀・`)
503 | (つ> _◕)つ︻╦̵̵͇̿̿̿̿╤───
504 | (´///☁///`)
505 | ε٩(๑> ₃ <)۶з
506 | Σ(*゚д゚ノ)ノ
507 | (゚д゚≡゚д゚)
508 | (́இ◞౪◟இ‵)
509 | ⇎_⇎
510 | (゚3゚)~♪
511 | |||||╭(_◇_)╮|||||
512 | (✘﹏✘ა)
513 | (´=ิ益=ิ`)
514 | ლ(•̀ _ •́ ლ)
515 | (*^﹏^*)
516 | (❁´ω`❁)*✲゚*
517 | (っ´ω`c)
518 | ☆^(o´Ф∇Ф)o
519 | レ(゚∀゚;)ヘ=З=З=З
520 | (*^ー^)
521 | (⊜‿⊜✴)
522 | (╯^◇^)╯
523 | ლ(◉◞౪◟◉ )ლ
524 | இдஇ
525 | ( *‵Д′)--●(#)‵⊿′)╮
526 | \( ̄ ◇,  ̄)/
527 | (ㆆᴗㆆ)
528 | (*゚ー゚)
529 | ( `皿´)ノ"鹵=巛巛巛巛.:;:
530 | (〃 ̄ω ̄)人( ̄︶ ̄〃)
531 | σ`∀´)σ
532 | -{}@{}@{}-υ( ̄◇ ̄*)
533 | (・`ω´・)
534 | (//´◜◞⊜◟◝`////)
535 | (≧∀≦)
536 | (;´д`).。oO(・・・・)
537 | ∑( ̄□ ̄;)
538 | (ง๑ •̀_•́)ง
539 | (;´゚ω゚`人)
540 | ╮/(>▽<)人(>▽<)╭
541 | ╩══╩ ~ ╰( ̄△ ̄╰)
542 | (╭ ̄3 ̄)╭♡
543 | ╮(﹀ω﹀")╭
544 | ( ՞ټ՞)
545 | _(┐「ε:)_
546 | ((((<||_◇_>
547 |  ̄﹃ ̄
548 | #゚Å゚)⊂彡☆))゚Д゚)・∵
549 | ( ▔︿▔)╭~
550 | ( ¯•ω•¯ )
551 | |||╭(_◇_)╮|||
552 | ( ゚Å゚)
553 | Σ(゚д゚lll)
554 | (´゚д゚`)
555 | Σ(・ω・ノ)ノ
556 | ლ(•ω •ლ)
557 | ^(˙oo˙)^
558 | ヽ(✿゚▽゚)ノ
559 | (´∩ω∩`)
560 | (*^@^*)
561 | (・ε・)
562 | ლↂ‿‿ↂლ
563 | (´◉‿◉`)
564 | ◔"L__◔
565 | ◝( ゚∀ ゚ )◟
566 | q(〒□〒)p
567 | ◢▆▅▄▃崩╰(〒皿〒)╯潰▃▄▅▇◣
568 | ◢▆▅▄▃ 崩╰(〒皿〒)╯潰 ▃▄▅▆◣
569 | ε≡ヘ( ´∀`)ノ
570 | (;̦̦̦̦̦̦̦̦ↂ⃝⃓⃙⃚⃘дↂ⃝⃓⃙⃚⃘;̦̦̦̦̦̦̦̦)
571 | (´σ-`)
572 | (゚皿゚メ)
573 | (^ρ^)/
574 | 。・゚・(ノД`)ヽ(゚Д゚ )秀秀
575 | ( ఠൠఠ )ノ
576 | _:(´□`」 ∠):_
577 | <(‵□′)>
578 | ლ(´∀`ლ)
579 | (✺ω✺)
580 | (๑•ૅω•´๑)
581 | ┌(┌^o^)┐
582 | (́순◞౪◟순‵)
583 | (# ̄▽ ̄)o╭╯ ★
584 | Σ(`L_` )
585 | ^( ̄(oo) ̄)^
586 | ლ(^o^ლ)
587 | (/‵Д′)/~ ╧╧
588 | (*´Д`)つ))´∀`)
589 | ( ☉_☉)≡☞o────★°
590 | ლ(́◉◞౪◟◉‵ლ)
591 | ┏┛學校┗┓λλλλλ
592 | (*´д`)
593 | (V)(́◉◞౪◟◉‵)(V)
594 | Σ Σ Σ (」○ ω○ )/
595 | ヾ(´ε`ヾ)
596 | 0(:3 )~ ('、3_ヽ)_
597 | =└(┐卍^o^)卍
598 | (;゚д゚)
599 | (*ˇωˇ*人)
600 | ( ♥д♥)
601 | ☮▁▂▃▄☾ ♛ ◡ ♛ ☽▄▃▂▁☮
602 | /(′□‵)\
603 | ( º﹃º )
604 | ( ´థ,_‥థ`)b
605 | ( Φ ω Φ )
606 | థ౪థ
607 | (*-*)y~
608 | (゚∀゚)
609 | |柱|ω・`)
610 | (́つ◞౪◟⊂‵)
611 | ( ´-ω ・)▄︻┻┳══━
612 | \(●o○;)ノ
613 | >(˙ω˙)<
614 | d(d'∀')
615 | (´^ิ益^ิ` )
616 | ( ´・ω)
617 | =_=
618 | \( ̄□ ̄)/
619 | (∂ω∂)
620 | (థฺˇ౪ˇథ)
621 | (✪ω✪)
622 | (σ ─.─)σ
623 | └|‧◇‧|┐
624 | |////| ( )ノ |////|(自動門
625 | 〠_〠
626 | (`皿´)
627 | 。゜+.(人-ω◕)゜+.゜
628 | ╮( ̄◇ ̄)╭
629 | _(√ ζ ε:)_
630 | (´ー`)y─┛
631 | ༼ つ/̵͇̿̿/’̿’̿ ̿ ̿̿ ̿̿◕ _◕ ༽つ/̵͇̿̿/’̿’̿ ̿ ̿̿ ̿̿ ̿̿
632 | (`д´)
633 | (ノДT)
634 | (¯ □ ¯)
635 | /(╯▽╰")\
636 | (ノД`)
637 | ( ◜◡‾)
638 | o(─//////─)o
639 | (:3っ)へ ヽ(´Д`●ヽ)
640 | (#‵)3′)▂▂▂▃▄▅~~~嗡嗡嗡嗡嗡
641 | ☆⌒(*^-゜)v
642 | ╮( ̄(oo) ̄)╭
643 | (´≖◞౪◟≖)
644 | (´∀`∩)↑↑
645 | ( ˘•ω•˘ )◞⁽˙³˙⁾
646 | ( ×ω× )
647 | (#`Д´)ノ
648 | ლ(❛◡❛✿)ლ
649 | -//(ǒ.ǒ)//-
650 | (́◉◞౪◟◉‵)
651 | ━( ิ◕㉨◕ ิ)━
652 | ╮( ̄□ ̄)╭
653 | (́一◞౪◟一‵)
654 | ヾ(;゚(OO)゚)ノ
655 | q( ̄ε ̄ )p
656 | (́◕◞ㆀ◟◕`)
657 | ╮(╯▽╰)╭
658 | ⊙谷⊙
659 | (๑థิ益థิ)
660 | <(‵△′)>
661 | ლ(゚д゚ლ)
662 | (╬⓪益⓪)
663 | ♥(´∀` )人
664 | ∪( ̄﹁ ̄)∪
665 | (・ิ㉨・ิ)
666 | (  ̄ 3 ̄)y▂ξ
667 | ┏((= ̄(エ) ̄=))┛
668 | ≧〔゜゜〕≦
669 | =͟͟͞͞( •̀д•́)
670 | (,_ゝ۞)
671 | (つД`)
672 | \( ̄□ ̄*)/
673 | (*─︵─)====Co( 一//////一)o
674 | \(⊙O⊙)/
675 | σ(‥)
676 | 。゚(゚´ω`゚)゚。
677 | ( ´∀`)
678 | (ゝ∀・)⌒☆
679 | (`3´)
680 | ( ˙灬˙ )
681 | (o゚ω゚o)
682 | (*゜ー゜)b
683 | ╰(〒○〒)╯
684 | (っ﹏-) .。o
685 | ✧(순_순)"
686 | ╯-____-)╯~═╩════╩═
687 | (●`・(エ)・´●)
688 | (。◇。||)
689 | ╮(╯_╰)╭
690 | Σヽ(゚Д ゚; )ノ
691 | ( ΄◞ิ .̫.̫ ◟ิ‵)y
692 | (#▔皿▔)─<(*—_____–)
693 | ( っธิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิ์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์ლ)
694 | (。´∀`)ノ―⊂ZZZ⊃
695 | ( ิΘ㉨Θ)ิ
696 | (´・д・`)ハ(・д・`*)
697 | (╯°O°)╯┻━┻
698 | σ ゚∀ ゚) ゚∀゚)σ
699 | (⌒o⌒)
700 | (´-ι_-`)
701 | ( ・O・)ノ⌒●
702 | *ଘ(੭*ˊᵕˋ)੭* ੈ✩‧₊˚
703 | (*´∀`)~♥
704 | (,,・ω・,,)
705 | (゜o゜(☆○=(-_- )
706 | ( ^ω^)
707 | (〒__〒)
708 | ┏(_д_┏)┓))
709 | (◓Д◒)✄╰⋃╯
710 | ヾ(;゚;Д;゚;)ノ゙
711 | (д) ゚゚
712 | | 柱 |┐( ̄◇ ̄*)
713 | o(.〝.)o
714 | (ㄒ◇ㄒ)o~
715 | ก็ʕ•͡ᴥ•ʔ ก้
716 | <(‵︵′)>
717 | (╯#▔__▔)╯┴┴\(" ̄□ ̄)
718 | ﹏﹏(ㄏ ̄△ ̄)ㄏ
719 | ( ˘・з・)
720 | (╯°▽°)╯ ┻━┻
721 | \(" ̄▽ ̄)/
722 | (•ө•)
723 | ~( ̄▽ ̄)~
724 | ( ิ◕㉨◕ ิ)
725 | ┴┴~╰(‵□′#)
726 | (:3[__]4
727 | (›´ω`‹ )
728 | ( ´Д`)y━・~~
729 | ◑▂◐
730 | (°ロ°٥)
731 | (・∀・)
732 | (。ŏ_ŏ)
733 | (ノ>ω<)ノ
734 | ( ゚∀゚) ノ♡
735 | ヽ(●´ε`●)ノ
736 | •✞_✞•
737 | (ゝ∀・)b
738 | (๑• . •๑)
739 | ◉ื益◉ื
740 | (╯ ‵□′)╯
741 | (´;;◉;⊖;◉;;`)
742 | (。•ㅅ•。)♡
743 | (ㅅ˘ㅂ˘)
744 | ٩(๑´3`๑)۶
745 | (´ΘωΘ`)
746 | (≖‿ゝ≖)✧
747 | ( ̄y▽ ̄)╭
748 | ,,Ծ‸Ծ,,
749 | σ(´∀`*)
750 | (´▽`ʃ♡ƪ)
751 | O-(/// ̄皿 ̄)☞ ─═≡☆゜★█▇▆▅▄▃▂_
752 | ʕ•̫͡•ʔ
753 | o(‧""‧)o
754 | (`∀´)
755 | ⁽⁽ ◟(∗ ˊωˋ ∗)◞ ⁾⁾
756 | (◍•ᴗ•◍)ゝ
757 | (*’ー’*)
758 | ( σ՞ਊ ՞)σ
759 | ( ˘•ω•˘ ).oOஇ
760 | ( ̄□ ̄||)
761 | (●` 艸 ´)
762 | (ಥ_ಥ)
763 | ⋉(● ∸ ●)⋊
764 | ✧*。٩(ˊᗜˋ*)و✧*。
765 | ( ´•̥̥̥ω•̥̥̥` )
766 | \(^ω^)/
767 | 癶(癶;:゜;益;゜;)癶
768 | (●´ω`●)ゞ
769 | (o゚ロ゚)┌┛Σ(ノ´*ω*`)ノ
770 | \(╯﹏╰)
771 | |柱|ω゚)---c<` д゚)!
772 | (๑´ㅂ`๑)
773 | (╯‵□′)╯︵┴─┴
774 | ╮(▔ε▔)╭
775 | 凸( ̄皿 ̄*)
776 | (゚々。)
777 | ヾ(>Д<;)))).....
778 | ヾ(:3ノシヾ)ノシ 三[____]
779 | (# ̄▽ ̄#)
780 | <( ̄︶ ̄)/
781 | <( ̄3 ̄)>
782 | づ(・ω・)づ
783 | い・ω・づ*
--------------------------------------------------------------------------------
/jseg/Emodet/emodata.json:
--------------------------------------------------------------------------------
1 | ["(\uff92\uff9f\u0414\uff9f)\uff92", "Zz(\u00b4-\u03c9-`*)", "(\ua4aa\u0366\u1d17\u0335\u030d\ua4aa\u0366 )", "( *\u00b4\u25d2`*)", "(\u2661\u02d9\ufe36\u02d9\u2661)", "(( \u3078(\u3078\u00b4\u2200`)\u3078", "(\u00f3\u3268\u00f2)", "( \u00b4\u2200`);y=\uff70(\uff9f\u0434\uff9f)\uff65\u2235. \uff80\uff70\uff9d", "\uff9f\u00c5\uff9f)", "(\u0301\u63d0\u25de\u0c6a\u25df\u4f9b\u2035)", "0", "(((o(*\uff9f\u25bd\uff9f*)o)))", "\u256e(\u256f\u2200\u2570)\u256d", "\u3002\uff65\uff9f\uff65(\u3064\u0434`\uff9f)\uff65\uff9f\uff65", "(^_\u3063^)", "(\u3000\u25de\u227c\u2638\u227d\u25df ._\u309d\u25de\u227c\u2638\u227d\u25df)", "( 3\u03c93)", "(\uff62\uff65\u03c9\uff65)\uff62", "(`\u3078\u00b4\u2260)", "( \u2022 \u0300\u03c9\u2022\u0301 )", "d(`\uff65\u2200\uff65)b", "(*\uff9f\u2200\uff9f*)", "\uff08 \u00b4\uff9f,_\u300d\uff9f\uff09\uff8a\uff9e\uff76\uff7c\uff9e\uff6c\uff88\uff70\uff89", "(\uff61\uff65\u3268\uff65\uff61)", "(\u00b4\u30fb\u03c9\u30fb)\u3064\u65e6", "\u2510(\u00b4\u0434`)\u250c", "(\uff9f\u0434\u2299)", "_(\u00b4\u0f40`\u300d \u2220)_ ", "(\u256c\u2609\u0434\u2299)", "\u03a3(;\uff9f\u0434\uff9f)", "(\uffe3(\u30a8)\uffe3)", "(\u00b4\u30fb\u03c9\u30fb`)", "(\u309d\u2200\uff65)\u2312\u2606", "\uff61\uff9f\u30fd(\uff9f\u00b4\u0414`)\uff89\uff9f\uff61", "\u2211(\u03b9\u00b4\u0414\u30f3)\u30ce", "\u2661(*\u00b4\u2200\uff40*)\u4eba(*\u00b4\u2200\uff40*)\u2661", "-`\u0434\u00b4-", "(\u309d\u2200\uff65)", "\uff08 \u00b4\u2623///_\u309d///\u2623\uff40\uff09", "(\u3012\ufe3f\u3012)", "(*\u2265\u25bd\u2264)\u30c4\u250f\u2501\u2513", "\u10da\uff5c\uff3e\u0414\uff3e\u10da\uff5c", "( \u02d8\u2022\u03c9\u2022\u02d8 )", "(\u0c36\u03c9\u0c36)", "( \u055e\u067c\u055e)", "\u30fb\u309c\u30fb(P\u0414`q\uff61)\u30fb\u309c\u30fb", " (//\u00b4/\u25d2/`//)", " (\u260d\ufe4f\u2070) ", "\u25d1\u03c9\u25d0", "(<\u309d\u03c9\u30fb) \u7dba\u7f85\u661f\u2606", " \u30fe(\u00b4\ufe36`*)\uff89\u266c", "\u250f( .-. \u250f ) \u2513", "(\u00b4c_`)", "\u2282\u5f61\u2606))\u0434`)", "(\u251b`\u0434\u00b4)\u251b", "\uff0f/( \u25d5\u203f\u203f\u25d5 )\uff3c", "\u1555 ( \u141b ) \u1557", "\u03c1(\u30fb\u03c9\u30fb\u3001)", "(\uff61-_-\uff61) ", "\u25dd(\u3000\uff9f\u2200 \uff9f )\u25df", "\u10da(\u0301\u25d5\u25de\u0c6a\u25df\u25d5\u2035\u10da)", "(\u0e51\u2022\u0301 \u2083 \u2022\u0300\u0e51)", "\uff3c\uff3c\\\\\u0669( '\u03c9' )\u0648 //\uff0f\uff0f", " ( \u00b4\uff9f\u0414\uff9f`)", "( \u2018\u0434\u2018\u2282\u5f61\u2606))\u0414\u00b4)", "\u10da(\u00b4\u2022\u0434\u2022 \u0300\u10da", "(\u2022\u04e9\u2022)", "\u03a3(\uff9f\u0434\uff9f)", "<*)) >>=<", "(\u2022\u3142\u2022)/", "\u30fe(*\u03a6\u03c9\u03a6)\u30c4", "(\u3000\uff9f\u2200\uff9f)\u3064\u2261\u2261\u2261\u2661\u2661\u2661)`\u03bd\u309c)\uff78\uff9e\uff7c\uff6c", "\u03b5=\u03b5=\u30fe(;\uff9f\u0434\uff9f)/", "_(\u2510\u300c\ufe43\uff9f\uff61)_", "o(\u2606\u0424\u2207\u0424\u2606)o", "(*\u00b4\u03c9`)\u4eba(\u00b4\u03c9`*)", "\u2533\u2501\u2533\u30ce( O\u03c9O\u30ce)", "~(\uff5eo\uffe3\u25bd\uffe3)\uff5eo.....o\uff5e(\uff3f\u25b3\uff3fo\uff5e)~..", "(\uff9f\u22bf\uff9f)", "(:\u25ce)\u2261", "\u2533\u2501\u2533\u30ce( ' - '\u30ce) ", "(\u00b4\uff5e`)", "(\u00a63[\u2593\u2593]\t", "(\u275b\u25e1\u275b\u273f)", "(\u00b4-\u03c9-\uff40)", "\u1566(\u00f2_\u00f3\u02c7)\u1564", "\u208d\u208d\u25dd(\uff65'\u03c9'\uff65)\u25df\u207e\u207e ", "( \u00ba\u0394\u00ba )", "(=\u00b4\u1d25`)", ": \u2661\uff61\uff9f.(*\u2661\u00b4\u25e1` \u4eba\u00b4\u25e1` \u2661*)\uff9f\u2661 \u00b0\u30fb", "(*\u00b4\u8278`*)", "(\u25d4\u2296\u25d4)\u3064", "(,,\uff9f\u0414\uff9f)", "\u2044(\u2044 \u2044\u2022\u2044\u03c9\u2044\u2022\u2044 \u2044)\u2044", "( \u0e34\u25d5\u3268\u25d5 \u0e34)", "\u10da(\u30fb\u00b4\uff6a`\u30fb\u10da)", "(\u2070\u25bf\u2070)", "(\uff923[____]", "\u30fd(\u309c\u25bd\u309c\u3000)\uff0dC<(/;\u25c7;)/~[\u62d6\u8d70] ", "(\uffeb\u072b\uffe9)", "(\u2267\u2200\u2266)\u309e", "(\u00b4\u0434((\u2606\u30dfPia!\u2282\u25bc(\uff4f \u2035\uff0d\u2032\uff4f)\u25bc\u3064Pia!\u5f61\u2605))\u2200`)", "(\u25cf\uff40\uff65(\uff74)\uff65\u00b4\u25cf)", "\u2727\u25dd(\u2070\u25bf\u2070)\u25dc\u2727", "<(_ _)>", "(\uff61A\uff61)", "(\u0e51\u2022\u0300\u3142\u2022\u0301)\u0648\u2727", "(\u261e\uff9f\u2200\uff9f)\uff9f\u2200\uff9f)\u261e", "(\u25cf\u0431\u4eba<\u25cf)", "((\u2514(:3\u300d\u250c)\u2518))", " ( \u2022\u0301 _ \u2022\u0300)\uff1f", "(\u0e51\u00af\u2200\u00af\u0e51)", "(\u25de\u2038\u25df)", "\u30fd(\u32a4V\u32a4*)\uff89", "\u03a3(\u00b0\u0414\u00b0;", "\u25e5(\u0e05\u00ba\uffe6\u00ba\u0e05)\u25e4", "(\u2018\u2299\u0434-)", " \u10da( \u2022 \u0300\u03c9\u2022\u0301 )\u3063", "(\u300d\u30fb\u03c9\u30fb)\u300d\u3046\u30fc\uff01(\uff0f\u30fb\u03c9\u30fb)\uff0f\u306b\u3083\u30fc\uff01", "(|||\uff9f\u0434\uff9f)", "(\u25b0\u02d8\u25e1\u02d8\u25b0)", "\u01aa(\u2022\u0303\u0361\u03b5\u2022\u0303\u0361)\u222b", "\uff08\u2019\u3078\u2019\uff09", "(\u256f\u00b0\u0414\u00b0)\u256f \u253b\u2501\u253b", "\uff9a(\uff9f\u2200\uff9f;)\uff8d=\u0417=\u0417=\u0417", "(\uff9f\u2200\u3002)", "\u30fd(\u2200\uff9f )\u4eba(\uff9f\u2200\uff9f)\u4eba( \uff9f\u2200)\u4eba(\u2200\uff9f )\u4eba(\uff9f\u2200\uff9f)\u4eba( \uff9f\u2200)\uff89", "(\u00b4\uff65\u0434\uff65\uff40)\uff8a(\uff65\u0434\uff65\uff40*)", "\u30fd( \u00b0 \u25bd\u00b0)\u30ce", "(\u25cf\u2070\u0c6a\u2070\u25cf)", "(\u274d\u1d25\u274d\u028b)", "..._\u3006(\u00b0\u25bd\u00b0*)", "(\u0384\u0ca2\u25de\u0c6a\u25df\u0ca2\u2035)\u25c9\u25de\u0c6a\u25df\u25c9)", "(\uff92 \uff9f\u76bf\uff9f)\uff92", "(*\u00b4\u0437\uff40*)", "\u03b5\u2261(\u30ce\u00b4\uff3f\u309d\uff40\uff09\u30ce", "(\u3003\u2200\u3003)", "\u0ca0_\u0ca0", "\u30fd(=^\uff65\u03c9\uff65^=)\u4e3f", "(\uff61\u00ed _ \u00ec\uff61)", "( \u00b4\u2200\uff40)\u3064t[ ]", "(\u2727\u2256\u203f\u309d\u2256)", " (\u3000\u00b4\u30fb\u25e1\u30fb\uff40)", "(:3[___]=", "(\uff61\u30fb\u03c9\u30fb\uff61)", "\uff08\u00b4\u25d4\u200b\u2200\u25d4`)", " (\u2022\u203e\u2323\u203e\u2022)", "....\u02ca\u02cb------\uff61:.\uff9f_\u30fd(_\u00b4\u2200`_)\uff89_.:\uff61((\u6d6e\u6c34", "\uff65*\uff65:\u2261(\u3000\u03b5:)", "\u9b54\u8cab\u5149\u6bba\u7832(\uff9f\u0414\uff9f)\u03c3\u250100000000000\u2501\u25cf", "( \u00b4\uff65\uff65)\uff89(._.`)", "\u30fd(#`\u0414\u00b4)\uff89", "\uff3f\uff89\u4e59(\uff64\uff9d\uff64)\uff3f", "(\u00b4-\u03c9\u0e01`)", "(\u3000\u141b)\u30d1\u30a1", "(*\u2018 v`*)", "(\u30fe\uff89\uff65\u03c9\uff65`)", "(\uff9f\u03c9\u00b4)", "(\u03c3\u56de\u03c9\u30fb)\u03c3\u2190\u2191\u2192\u2193\u2190\u2191", "(\u00b4\u25d3\u0414\u25d4`)", "\uff77\uff80\u2501\u2501\u2501\u2501(\uff9f\u2200\uff9f)\u2501\u2501\u2501\u2501!!", "(\u00b4>\u2200)\u4eba(\u00b4\u30fb\u03c9\u30fb)\uff89\u30fd(\u30fb\u03b5\u30fb*)\u4eba(-\u0434-`)", "(\uff89\u25d5\u30ee\u25d5)\uff89*:\uff65\uff9f\u2727", "_(\u00a63\u300d\u2220)_", "\u00f6\u3145\u00f6", "(\u25cf\u25bc\u25cf;)", "(\u256c\uff9f\u0434\uff9f)\u2584\ufe3b\u253b\u2533\u2550\u4e00", "(^u^)", "(`\u30fb\u03c9\u30fb\u00b4)", "(\u3000\u2032\u2200`)\u03c3\u2261\u03c3\u2606))\u0414\u2032)", "( \uff5e'\u03c9')\uff5e", ":;(\u2229\u00b4\ufe4f`\u2229);:", "(\u2256\u1d17\u2256\u0e51)", "(\u310f\uffe3\u25bd\uffe3)\u310f \u311f(\uffe3\u25bd\uffe3\u311f)", "(\u30ce\u30fb\uff3f\u30fb)\u30ce\u51f9 \u2523\u51f9\u2501\u51f9\u2501\u51f9\u252b", "(\uff65\u03c9\u00b4\uff65 )", "\u03a6\u0c6a\u03a6", "\u0e05(\u0e51*\u0434*\u0e51)\u0e05!!", "\u03a3(\uff9f\u0414\uff9f\uff1b\u2261\uff1b\uff9f\u0434\uff9f)", "\u0669(\uff61\u30fb\u03c9\u30fb\uff61)\ufeff\u0648", "(\u201c\uffe3\u25bd\uffe3)-o\u2588 \u2588o-(\uffe3\u25bd\uffe3\u201d)/", "d(\u30fb\u03c9\u30fbd) \u5fae\u5206\uff01(\u222b\u30fb\u03c9\u30fb)\u222b \u7a4d\u5206\uff01\u2202(\u30fb\u03c9\u30fb\u2202) \u504f\u5fae\u5206\uff01(\u222e\u30fb\u03c9\u30fb)\u222e \u6cbf\u9589\u66f2\u7dda\u7684\u7a4d\u5206\uff01(\u222c\u30fb\u03c9\u30fb)\u222c \u91cd\u7a4d\u5206\uff01\u2207(\u30fb\u03c9\u30fb\u2207)\u68af\u5ea6\uff01\u2207\u30fb(\u30fb\u03c9\u30fb\u2207\u30fb)\u6563\u5ea6\uff01\u2207\u00d7(\u30fb\u03c9\u30fb\u2207\u00d7)\u65cb\u5ea6\uff01\u0394(\u30fb\u03c9\u30fb\u0394)\u62c9\u666e\u62c9\u65af\uff01", "(\u0ca0\u76ca\u0ca0)", "(\u00b4,_\u309d`)", "(((\uff9f\u0414\uff9f;)))", "(\u2565\ufe4f\u2565)", "(/ \u21803\u2180)/q(\ufe4c\u03c9\ufe4c )", "\u10da(\u2579\u25e1\u2579\u10da)", "(o\u00b4\u30fb\u03c9\u30fb`)\u03c3)\u0414`)", "_(:3 \u2312\uff9e)_", "(^y^)", "(\u03c3\u2032\u25bd\u2035)\u2032\u25bd\u2035)\u03c3", "(\u00b4,,\u2022\u03c9\u2022,,)\u2661", "\u261d( \u25e0\u203f\u25e0 )\u261d", "(\u00b4;\u03c9;`)", "(\u3000\u3063\u0e04\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49\u0e49c )", "\u03be( \u273f\uff1e\u25e1\u275b)\u2584\ufe3b\u2587\u2587\u3013\u2584\ufe3b\u253b\u2533\u2550\u4e00", "(\u00b4\u2299\u03c9\u2299`)", "(\u2256\uff3f\u2256)\u2727", "||\u03a6|(|\u00b4|\u0414|`|)|\u03a6||", "O-(///\uffe3\u76bf\uffe3)\u261e \u2500\u2550\u2261\u2606\u309c\u2605\u2588\u2587\u2586\u2585\u2584\u2583\u2582\uff3f\u3000", "( \u00b4\u0c25\u0c6a\u0c25\uff09\u03c3", "(\u25d3\u0414\u25d2)\u2704\u2570\u22c3\u256f", "(\u256f\u2035\u25a1\u2032)\u256f\ufe35\u2534\u2500\u2534", "(*\u00b4\uff65\u0434\uff65)?", "_(\u00b0\u03c9\u00b0\uff63 \u2220)", "(\u30fb\u2200\u30fb)\u3064\u2469", "\u252c\u2500\u252c \u30ce( ' - '\u30ce) ", "\u0669(\u014f\ufe4f\u014f\u3001)\u06f6", "(\u00b4\uff9f\u0434\uff9f`)", "\u25e2\u2586\u2585\u2584\u2583\u5d29\u2570(\u3012\u76bf\u3012)\u256f\u6f70\u2583\u2584\u2585\u2587\u25e3", "\u00b4-\u03c9-)b", "(\u2609\u0434\u2299)", "(\u309c\u76bf\u3002)", "(\u03a6\u03c9\u03a6)", "\u03b5(*\u00b4\uff65\u2200\uff65\uff40)\u0437\uff9e", "\uff08\u00b4\u25d4 \u2083 \u25d4`)", "(=m=)", "(\u00b4_\u309d`)", "(\u00b0\u0f40\u00b0)", "\u0e5b\u0e01(\uff70\u0300\u03c9\uff70\u0301\u0e01)", "\u2727*\uff61\u0669(\u02ca\u15dc\u02cb*)\u0648\u2727*\uff61", "( \u256f' - ')\u256f \u253b\u2501\u253b ", "\u0e01\u0e47\u0295\u2022\u0361\u1d25\u2022\u0294 \u0e01\u0e49", "(\u3063\u30fb\u0414\u30fb)\u3063", "\uff61:.\uff9f\u30fd(*\u00b4\u2200`)\uff89\uff9f.:\uff61", "m(\uff61\u2267\uff74\u2266\uff61)m", "( \uff92\u2200\u30fb)", " ( \u30fb\u30fb)\u3064\u2015{}@{}@{}-", "(\u25d5\u072b\u25d5)", "( \u0361\u00b0 \u035c\u0296 \u0361\u00b0)", " (\u25dc\u25d4\u3002\u25d4\u25dd)", " ( \u00a4\u0334\u0336\u0337\u0324\u0301 \u2027\u032b\u032e \u00a4\u0334\u0336\u0337\u0324\u0300 )", "(*\u00b4\u0434`)~\u2665", "\u1ece\u0337\u0356\u0348\u031e\u0329\u034e\u033b\u032b\u032b\u031c\u0349\u0320\u032b\u0355\u032d\u032d\u032b\u032b\u0339\u0317\u0339\u0348\u033c\u0320\u0316\u034d\u035a\u0325\u0348\u032e\u033c\u0355\u0320\u0324\u032f\u033b\u0325\u032c\u0317\u033c\u0333\u0324\u0333\u032c\u032a\u0339\u035a\u031e\u033c\u0320\u0355\u033c\u0320\u0326\u035a\u032b\u0354\u032f\u0339\u0349\u0349\u0318\u034e\u0355\u033c\u0323\u031d\u0359\u0331\u031f\u0339\u0329\u031f\u0333\u0326\u032d\u0349\u032e\u0316\u032d\u0323\u0323\u031e\u0319\u0317\u031c\u033a\u032d\u033b\u0325\u035a\u0359\u031d\u0326\u0332\u0331\u0349\u0356\u0349\u0330\u0326\u034e\u032b\u0323\u033c\u034e\u034d\u0320\u032e\u0353\u0339\u0339\u0349\u0324\u0330\u0317\u0319\u0355\u0347\u0354\u0331\u0355\u032d\u0348\u0333\u0317\u032d\u0354\u0318\u0316\u033a\u032e\u031c\u0320\u0356\u0318\u0353\u0333\u0355\u031f\u0320\u0331\u032b\u0324\u0353\u0354\u0318\u0330\u0332\u0359\u034d\u0347\u0319\u034e\u0323\u033c\u0317\u0316\u0359\u032f\u0349\u0320\u031f\u0348\u034d\u0355\u032a\u0353\u031d\u0329\u0326\u0316\u0339\u033c\u0320\u0318\u032e\u035a\u031f\u0349\u033a\u031c\u034d\u0353\u032f\u0333\u0331\u033b\u0355\u0323\u0333\u0349\u033b\u032d\u032d\u0331\u034d\u032a\u0329\u032d\u033a\u0355\u033a\u033c\u0325\u032a\u0356\u0326\u031f\u034e\u033b\u0330_\u1ece\u0337\u0356\u0348\u031e\u0329\u034e\u033b\u032b\u032b\u031c\u0349\u0320\u032b\u0355\u032d\u032d\u032b\u032b\u0339\u0317\u0339\u0348\u033c\u0320\u0316\u034d\u035a\u0325\u0348\u032e\u033c\u0355\u0320\u0324\u032f\u033b\u0325\u032c\u0317\u033c\u0333\u0324\u0333\u032c\u032a\u0339\u035a\u031e\u033c\u0320\u0355\u033c\u0320\u0326\u035a\u032b\u0354\u032f\u0339\u0349\u0349\u0318\u034e\u0355\u033c\u0323\u031d\u0359\u0331\u031f\u0339\u0329\u031f\u0333\u0326\u032d\u0349\u032e\u0316\u032d\u0323\u0323\u031e\u0319\u0317\u031c\u033a\u032d\u033b\u0325\u035a\u0359\u031d\u0326\u0332\u0331\u0349\u0356\u0349\u0330\u0326\u034e\u032b\u0323\u033c\u034e\u034d\u0320\u032e\u0353\u0339\u0339\u0349\u0324\u0330\u0317\u0319\u0355\u0347\u0354\u0331\u0355\u032d\u0348\u0333\u0317\u032d\u0354\u0318\u0316\u033a\u032e\u031c\u0320\u0356\u0318\u0353\u0333\u0355\u031f\u0320\u0331\u032b\u0324\u0353\u0354\u0318\u0330\u0332\u0359\u034d\u0347\u0319\u034e\u0323\u033c\u0317\u0316\u0359\u032f\u0349\u0320\u031f\u0348\u034d\u0355\u032a\u0353\u031d\u0329\u0326\u0316\u0339\u033c\u0320\u0318\u032e\u035a\u031f\u0349\u033a\u031c\u034d\u0353\u032f\u0333\u0331\u033b\u0355\u0323\u0333\u0349\u033b\u032d\u032d\u0331\u034d\u032a\u0329\u032d\u033a\u0355\u033a\u033c\u0325\u032a\u0356\u0326\u031f\u034e\u033b\u0330", "\u10da(\u256f\u2299\u03b5\u2299\u10da\u2570)", " (\u0a6d\u0941\u00b4 \u141c `)\u0a6d\u0941\u207e\u207e", "(\u0e51\u00b4\u06a1`\u0e51)", "\u0e05\u25cf\u03c9\u25cf\u0e05", "( \uffe3\u25a1\uffe3)/ \u656c\u79ae!! <(\uffe3\u3127\uffe3 ) <(\uffe3\u3127\uffe3 )", "(`\u250c_\u2510\u00b4)", "(\u203e\u25e1\u25dd\u3000)", "( \u25d5\u203f\u203f\u25d5 )", "\u250c|\u25ceo\u25ce|\u2518", "( *\u00af \u00b3\u00af*)\u2661\u3118\u3085", "\u2501(\uff9f\u2200\uff9f)\u2501( \uff9f\u2200)\u2501( \uff9f)\u2501( )\u2501( )\u2501(\uff9f)\u2501(\u2200\uff9f)\u2501(\uff9f\u2200\uff9f)\u2501", "\u30fd(\u30fb\u00d7\u30fb\u00b4)\u309e", "\u03a3(lli\u0434\uff9f\uff89)\uff89", "(\u3064\u0434\u2282)", "(\u256c\uff9f\u0434\uff9f)", "( \u00b4(00)`)", "(\uffe3\u03b5(#\uffe3)\u2606", "(\ub208\u2038\ub208)", "(\u30fb\u2200\u30fb)\u30ce\u4e09G[__]\uff7a", "\u2282(\u00b0\u0414\u00b0\u2282\u2312\uff40\u3064\u2261\u2261\u2261(\u00b4\u2312;;;\u2261\u2261\u2261", "( \uff9f\u2200\uff9f)o\u5f61\uff9f", " |\u0414`)\u30ce\u2312\u25cf\uff5e*", "_(:3 \u300d\u2220 )_", "\u03a3(\uff9f\u03c9\uff9f)", "\u30fe(\u25cf\u309c\u25bd\u309c\u25cf)\u2661 ", "(o\u00b4\u7f52`o)", "\uff08\u00b4-`\uff09.\uff61oO", "(\uff89\u2200`*)", "\u25e2\u2586\u2585\u2584\u2583 \u6eab\u2570(\u301e\ufe36\u301d) \u256f\u99a8 \u2583\u2584\u2585\u2586\u25e3", "\u03a3( \u00b0 \u25b3 \u00b0)", "\u03be( \u273f\uff1e\u25e1\u275b)", "(\u2070\u2296\u2070)", "(#\uff9f\u22bf`)\u51f8", "(\u0e51\u01a1 \u2083 \u01a1)\u2665", "(\u0301=\u25de\u0c6a\u25df=\u2035)", "(-`\u30a7\u00b4-\u256c)", "\u3069\u3053\u2501\u2501\u2501\u2501(\u309c\u2200\u309c\u4e09\u309c\u2200\u309c)\u2501\u2501\u2501\u2501!!??", "(;\u00b4\u0f0e\u0eb6\u0414\u0f0e\u0eb6`)", "(\uff61>\ufe4f<)\u54c8\u557e", "( \uff9f \u03a7 \uff9f)", "(\u3063\u25cf\u03c9\u25cf)\u3063", "\u0669(\u0e51\u2022\u0300\u03c9\u2022\u0301\u0e51)\u06f6", "il||li _|\uffe3|\u25cb\u30fd(\uff65\u03c9\uff65\uff40)", "(\u00b4\uff21\uff40\u3002)", "(o\uff65e\uff65)", "(\u265b\u203f\u265b)", "\u0285\uff08\u00b4\u25d4\u0c6a\u25d4\uff09\u0283", "m9(\uff3e\u0414\uff3e)\uff8c\uff9f\uff77\uff9e\uff6c\uff70", "\u256e(\u2032\uff5e\u2035\u301e)\u256d", "(\u2604\u25e3\u03c9\u25e2)\u2604", "\u30fd(`\u0414\u00b4)\u30ce", "(\u256c\uff9f \u25e3 \uff9f)", "(\u03c3\uff9f\u2200\uff9f)\u03c3..:*\u2606", "(\u25d4\u0c6a\u25d4)", "( \u00b4\u0414`)y\u2501\uff65~~", "(\uff61\u25d5\u2200\u25d5\uff61)", "\u30fe(\u25ce\u00b4\u30fb\u03c9\u30fb\uff40)\u30ce ", "(\u0e51\u00b4\u3141`)", "(\u00b4\u30fb\u00c5\u30fb`)", "(((\uff9f\u0434\uff9f)))", "(\u2229^o^)\u2283\u2501\u2606\uff9f.*\uff65\uff61", "\u2500=\u2261\u03a3((( \u3064\u2022\u0300\u03c9\u2022\u0301)\u3064", "(\u00b4;\u03c9;`)\u30fe(\uff65\u2200\uff65`)", "\u250f((\uff1d\uffe3(\uff74)\uffe3=))\u251b", "\uff08\u3064> _\u25d5\uff09\u3064\ufe3b\u2566\u0335\u0335\u0347\u033f\u033f\u033f\u033f\u2564\u2500\u2500\u2500", "(\u00b0\uff9b\u00b0\u0665)", "\u03b5\u0669(\u0e51> \u2083 <)\u06f6\u0437", "\u03a3(*\uff9f\u0434\uff9f\uff89)\uff89", "(\uff9f\u0434\uff9f\u2261\uff9f\u0434\uff9f)", "(\uff9f3\uff9f)\uff5e\u266a", "(\u2741\u00b4\u03c9`\u2741)*\u2732\uff9f*", "(\u3063\u00b4\u03c9`c)", "\u2606^(\uff4f\u00b4\u0424\u2207\u0424)o", "\u0505(\u00af\ufe43\u00af\u0505)", "\u10da(\u25c9\u25de\u0c6a\u25df\u25c9 )\u10da", "\u0b87\u0434\u0b87", "(\u3186\u1d17\u3186)", "(*\uff9f\u30fc\uff9f)", "(\u3003\uffe3\u03c9\uffe3)\u4eba(\uffe3\ufe36\uffe3\u3003) ", "\u03c3`\u2200\u00b4)\u03c3", "(\u30fb`\u03c9\u00b4\u30fb)", " (//\u00b4\u25dc\u25de\u229c\u25df\u25dd\uff40////) ", "(;\u00b4\u0434\uff40).\uff61\uff4fO(\u30fb\u30fb\u30fb\u30fb)", "(\u0e07\u0e51 \u2022\u0300_\u2022\u0301)\u0e07", "(\uff1b\u00b4\uff9f\u03c9\uff9f\uff40\u4eba)", "\u256e/(\uff1e\u25bd<)\u4eba(>\u25bd\uff1c)\u256d", "(\u256d\uffe33\uffe3)\u256d\u2661 ", "( \uff9f\u0414\uff9f)\u03c3", "_(\u2510\u300c\u03b5:)_", "#\uff9f\u00c5\uff9f\uff09\u2282\u5f61\u2606))\uff9f\u0414\uff9f)\uff65\u2235", "( \u00af\u2022\u03c9\u2022\u00af )", "\u30fe(:3\uff89\uff7c\u30fe)\uff89\uff7c \u4e09[____]", "( \u0c36 3\u0a5c)\uff5e\u2665", " \u10da(\u2022\u03c9 \u2022\u10da)", "\u30fd(\u273f\uff9f\u25bd\uff9f)\u30ce", " (\u00b4\u2229\u03c9\u2229\uff40) ", "(\u30fb\u03b5\u30fb)", "(\u00b4\u25c9\u203f\u25c9\uff40)", "(\u00b4\uff65_\uff65`)", "(\u3065\u2032\u25bd`)\u3065", "\u03b5\u2261\uff8d( \u00b4\u2200`)\uff89", "(\u00b4\u03c3-`)", "(\uff9f\u76bf\uff9f\uff92)", "(^\u03c1^)/", "\uff61\uff65\uff9f\uff65(\uff89\u0414`)\u30fd(\uff9f\u0414\uff9f )\u79c0\u79c0", "( \u0c20\u0d60\u0c20 )\uff89", "_:(\u00b4\u25a1`\u300d \u2220):_", "(\u273a\u03c9\u273a)", "(\u0e51\u2022\u0ac5\u03c9\u2022\u00b4\u0e51)", "(\u0301\uc21c\u25de\u0c6a\u25df\uc21c\u2035) ", "\u03a3(\uff40L_\uff40 )", "(\uff0f\u2035\u0414\u2032)\uff0f~ \u2567\u2567", "(:3[__]4", "(*\u00b4\u0414`)\u3064))\u00b4\u2200`)", "( \u2609_\u2609)\u2261\u261eo\u2500\u2500\u2500\u2500\u2605\u00b0", "\u250f\u251b\u5b78\u6821\u2517\u2513\u03bb\u03bb\u03bb\u03bb\u03bb", "(*\u00b4\u0434`)", "\u03a3 \u03a3 \u03a3 (\u300d\u25cb \u03c9\u25cb )\uff0f", "\u30fe(\u00b4\u03b5`\u30fe)", "0(:3\u3000)\uff5e ('\uff643_\u30fd)_", "=\u2514(\u2510\u534d^o^)\u534d", "(;\uff9f\u0434\uff9f)", "(*\u02c7\u03c9\u02c7*\u4eba)", "( \u2665\u0434\u2665)", "( \u00ba\ufe43\u00ba )", "( \u03a6 \u03c9 \u03a6 )", "\u0c25\u0c6a\u0c25", "(\uff9f\u2200\uff9f)", "|\u67f1|\u03c9\u30fb`)", "( \u00b4-\u03c9 \uff65)\u2584\ufe3b\u253b\u2533\u2550\u2550\u2501", "\uff3c(\u25cfo\u25cb;)\u30ce", "d(d\uff07\u2200\uff07)", "( \u00b4\uff65\u03c9)", "(\u2202\u03c9\u2202)", "|////|\u3000( \u3000)\uff89\u3000|////|(\u81ea\u52d5\u9580", "\u3002\u309c+.(\u4eba-\u03c9\u25d5)\u309c+.\u309c", "_(\u221a \u03b6 \u03b5:)_", "\u0f3c \u3064/\u0335\u0347\u033f\u033f/\u2019\u033f\u2019\u033f \u033f \u033f\u033f \u033f\u033f\u25d5 _\u25d5 \u0f3d\u3064/\u0335\u0347\u033f\u033f/\u2019\u033f\u2019\u033f \u033f \u033f\u033f \u033f\u033f \u033f\u033f", "(`\u0434\u00b4)", "(\u3000\u25dc\u25e1\u203e)", " (:3\u3063)\u3078 \u30fd(\u00b4\u0414\uff40\u25cf\u30fd)", "(#\u2035)3\u2032)\u2582\u2582\u2582\u2583\u2584\u2585\uff5e\uff5e\uff5e\u55e1\u55e1\u55e1\u55e1\u55e1", "\u2606\u2312(*^-\u309c)v", "(\u00b4\u2256\u25de\u0c6a\u25df\u2256)", "( \u02d8\u2022\u03c9\u2022\u02d8 )\u25de\u207d\u02d9\u00b3\u02d9\u207e", "( \u00d7\u03c9\u00d7 )", "(#`\u0414\u00b4)\uff89", "-//(\u01d2.\u01d2)//-", "(\u0301\u25c9\u25de\u0c6a\u25df\u25c9\u2035)", "\u30fe(\uff1b\uff9f(OO)\uff9f)\uff89", "\u2299\u8c37\u2299", "\u10da(\uff9f\u0434\uff9f\u10da)", "\u2665(\u00b4\u2200` )\u4eba", "( \uffe3 3\uffe3)y\u2582\u03be", "\uff61\uff9f(\uff9f\u00b4\u03c9`\uff9f)\uff9f\uff61", "\u2267\u3014\u309c\u309c\u3015\u2266", "=\u035f\u035f\u035e\u035e( \u2022\u0300\u0434\u2022\u0301)", "(\u2718\ufe4f\u2718\u10d0)", "(`3\u00b4)", "(\u3000\u02d9\u706c\u02d9\u3000)", "(\uff0a\u309c\u30fc\u309c)b", "(\u3063\ufe4f-) .\uff61o", "\u256f-____-)\u256f~\u2550\u2569\u2550\u2550\u2550\u2550\u2569\u2550", "(\u706c\u00ba\u03c9\u00ba\u706c)", "\u256e(\u256f_\u2570)\u256d", "\u03a3\u30fd(\uff9f\u0414 \uff9f; )\uff89", "(\u3000\u3063\u0e18\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e34\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u0e4c\u10da)", "(\uff61\u00b4\u2200\uff40)\uff89\u2015\u2282ZZZ\u2283", "\u03a3>\u2015(\u3003\u00b0\u03c9\u00b0\u3003)\u2661\u2192", "(\u256f\u00b0O\u00b0)\u256f\u253b\u2501\u253b", "\u03c3 \uff9f\u2200 \uff9f) \uff9f\u2200\uff9f)\u03c3 ", "(\u00b4-\u03b9_-\uff40)", "*\u0b18(\u0a6d*\u02ca\u1d55\u02cb)\u0a6d* \u0a48\u2729\u2027\u208a\u02da", "(*\u00b4\u2200`)~\u2665", "(,,\u30fb\u03c9\u30fb,,)", "( ^\u03c9^)", "\u250f(_\u0434_\u250f)\u2513))", "(=\u00b4\u03c9`=)", "\u30fe(;\uff9f;\u0414;\uff9f;)\uff89\uff9e", "(\u0434) \uff9f\uff9f", "(=\uff9f\u0414\uff9f=) \u2584\ufe3b\u253b\u2533\u2501 \u00b7.`.`.`.", "( \u02d8\uff65\u0437\uff65)", "(\u256f\u00b0\u25bd\u00b0)\u256f \u253b\u2501\u253b", "(\u272a\u03c9\u272a)", "\uff3c(\u25cf\u00b4\u03d6`\u25cf)\uff0f", "\u30fd(\u25cf\u00b4\u03b5\uff40\u25cf)\u30ce", "(\u203a\u00b4\u03c9`\u2039 )", "(#`\u76bf\u00b4)", "\u25d1\u2582\u25d0", "(\u00b4///\u2601///`)", "(\uff61\u014f_\u014f)", "(\uff89>\u03c9<)\uff89", "(\u3000\uff9f\u2200\uff9f) \uff89\u2661", "\u2211(\uffe3\u25a1\uffe3;)", "(\u309d\u2200\uff65)b", "(\u0e51\u2022 . \u2022\u0e51)", "(\u00b4;;\u25c9;\u2296;\u25c9;;\uff40)", "(\uff61\u2022\u3145\u2022\uff61)\u2661", "(\u3145\u02d8\u3142\u02d8)", "\u0669(\u0e51\u00b43\uff40\u0e51)\u06f6", "(\u00b4\u0398\u03c9\u0398`)", ",,\u053e\u2038\u053e,,", "\u03c3(\u00b4\u2200\uff40*)", "(\u00b4\u25bd`\u0283\u2661\u01aa)", "\u250c(\u250c^o^)\u2510", "\u207d\u207d \u25df(\u2217 \u02ca\u03c9\u02cb \u2217)\u25de \u207e\u207e", "(\u25cd\u2022\u1d17\u2022\u25cd)\u309d", "(*\u2019\uff70\u2019*)", "( \u03c3\u055e\u0a0a \u055e)\u03c3 ", "( \u02d8\u2022\u03c9\u2022\u02d8 ).oO\u0b87", "(\u25cf\uff40 \u8278 \u00b4)", "\u22c9(\u25cf \u2238 \u25cf)\u22ca", "( \u00b4\u2022\u0325\u0325\u0325\u03c9\u2022\u0325\u0325\u0325` )", "(\u25cf\u00b4\u03c9\uff40\u25cf)\u309e", "(\uff4f\uff9f\uff9b\uff9f)\u250c\u251b\u03a3(\uff89\u00b4*\u03c9*`)\uff89", "|\u67f1|\u03c9\uff9f)---c<` \u0434\uff9f)!", "(\u0e51\u00b4\u3142`\u0e51)", "(\uff9f\u3005\uff61)", "\u30fd(\u0301\u25d5\u25de\u0c6a\u25df\u25d5\u2035)\uff89"]
--------------------------------------------------------------------------------
/jseg/Emodet/emodet.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | from os.path import abspath, dirname, join
3 | import urllib2
4 | import re
5 | import time
6 | import json
7 |
8 | CUR_PATH = dirname(abspath(__file__))
9 |
10 |
11 | def craw_emo(save=True):
12 | base = 'http://facemood.grtimed.com/index.php?view=facemood&tid=%d'
13 | pat = re.compile(u'''flashvars="str=(.*?)"''')
14 | emos = []
15 |
16 | cnt1 = 1
17 | while cnt1 < 200:
18 | print cnt1
19 | url = base % cnt1
20 | raw = urllib2.urlopen(url).read().decode('utf-8')
21 | res = pat.findall(raw)
22 | if res == []:
23 | cnt1 += 1
24 | else:
25 | emos += res
26 | cnt2 = 2
27 | while True:
28 | url2 = url + '&page=%d' % cnt2
29 | print url2
30 | raw2 = urllib2.urlopen(url2).read().decode('utf-8')
31 | res2 = pat.findall(raw2)
32 | if res2 == []:
33 | break
34 | else:
35 | emos += res2
36 | cnt2 += 1
37 | time.sleep(0.5)
38 | cnt1 += 1
39 | time.sleep(0.5)
40 | print '%d emos collected' % len(emos)
41 | emos = list(set(emos))
42 | if save:
43 | with open('emodata.json', 'w') as jf:
44 | json.dump(emos, jf)
45 | print 'emoticons saved!'
46 | else:
47 | return emos
48 |
49 |
50 | def load_emo():
51 | with open(join(CUR_PATH, 'emoall.txt'), encoding="utf-8") as f:
52 | "") as f:
53 | emos = f.read().decode('utf-8')
54 | emos = emos.split('\n')
55 | emos = [i for i in emos if i != '\n']
56 | # with open(join(CUR_PATH, 'emodata.json')) as jf1, \
57 | # open(join(CUR_PATH, 'emo2.json')) as jf2, \
58 | # open(join(CUR_PATH, 'emo3.json')) as jf3, \
59 | # open(join(CUR_PATH, 'emo4.json')) as jf4:
60 | # emos1 = json.load(jf1)
61 | # emos2 = json.load(jf2)
62 | # emos3 = json.load(jf3)
63 | # emos4 = json.load(jf4)
64 | # emos = emos1 + emos2 + emos3
65 | emo_ext = [u'= =', u'=.=', u'=_=', u'>///<', u'> <', u'orz',
66 | u'^ ^', u'XD', u'(′・_・`)', u'-_-||', u'>"<', u'T.T']
67 | emos += emo_ext
68 | emos = list(set(emos))
69 | emos = [i.strip() for i in emos]
70 | return emos
71 |
72 |
73 | def ensure_unicode(string):
74 | if not isinstance(string, unicode):
75 | try:
76 | string = string.decode('utf-8')
77 | except:
78 | raise UnicodeError('Input should be UTF8 or UNICODE')
79 | return string
80 |
81 | emos = load_emo()
82 |
83 |
84 | def find_emo(txt, source=False):
85 | global emos
86 | txt = ensure_unicode(txt)
87 | res = [i for i in emos if i in txt]
88 | if source is True:
89 | for x in res:
90 | txt = txt.replace(x, '' + x + '')
91 | return (res, txt)
92 | return res
93 |
94 | # source
95 | # http://facemood.grtimed.com/ (顏文字卡) --> emodata.json
96 | # http://news.gamme.com.tw/55689 (宅宅新聞) --> emo2.json
97 | # http://www.ptt.cc/bbs/cksh77th17/M.1158075878.A.9D2.html (PTT) --> emo3.json
98 | # http://j831124g2009.pixnet.net/blog/post/17973045-素材│日本顏文字(可愛)(*☉౪⊙*)
99 | # (痞客邦) --> emo4.json
100 |
101 | # problems
102 | # 兩行以上的emoticons怎麼辦?
103 | # ▲ ▲ ▲
104 | # ﹏﹏(ㄏ ̄▽ ̄)ㄏ q(〒□〒)p ㄟ( ̄▽ ̄ㄟ)﹏﹏ 通通變成阿飄
105 |
--------------------------------------------------------------------------------
/jseg/Emodet/emodet2.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 | import re
3 |
4 | metachr = '^$*+?{}[]\|()'
5 |
6 | heads = u'''>ζↁ━இㅇ(┌/∑┐ψ〒ʕ└⊙<√=☜ㄟ〠Σ•ತ✧╰(*-/☮Oε┴‷〈ಸ爻<︿乁♡bˋ⁄ˇఠ◉ಠ○vm#◎⇎Ψmb◔♀ⶸლ\。 ̄◢づ‧≧흫…╭o╮ჰ≡癶凸~ヽヾ'''
7 | tails = u'''ↁノ「↑̖〠ิ▽♀⁄ˇ◣~⑤╮凵︿・|━)dʔಠ√ತ'+/°3ืಸ;?ξツఠm﹏◎"⇎。≡╩╭oჴஇ☆┌з┐〒↗⊙☮☞•…*.┴:\︴♡σ♂d○●Z ̄b≦♪╬╰癶>シ~#〉3↘┛”;!✧)Ψ-ⶸ=_ㅇψˊノ◔ლaづ╧흫m╯y凸'''
8 | for i in metachr:
9 | heads = heads.replace(i, '\\'+i)
10 | tails = tails.replace(i, '\\'+i)
11 |
12 |
13 | def ensure_unicode(string):
14 | if not isinstance(string, unicode):
15 | try:
16 | string = string.decode('utf-8')
17 | except:
18 | raise UnicodeError('Input should be UTF8 or UNICODE')
19 | return string
20 |
21 | def clear_num(txt):
22 | num_pat = re.compile(u'''([0-9\-.:/]{2,})''')
23 | res = num_pat.sub('', txt)
24 | return res
25 |
26 | def post_check(candlst):
27 | con = []
28 | for i in candlst:
29 | while True:
30 | i_mod = i.strip()
31 | i_mod = re.sub(u'\([一-龥]{1,}[\)]{0,1}', '', i_mod)
32 | i_mod = re.sub(u'([一-龥]{1,})', '', i_mod)
33 | if len(set(i_mod)) < 3:
34 | break
35 | else:
36 | if re.search(u'^m:[0-9a-zA-Z一-龥]', i_mod): # m:大大
37 | break
38 | elif re.search(u'^~[0-9a-zA-Z一-龥]', i_mod): #~4次
39 | break
40 | elif i_mod == i:
41 | con.append(i_mod)
42 | break
43 | else:
44 | i = i_mod
45 | continue
46 | return con
47 |
48 | def exclude_cht(txt):
49 | pat = re.compile(u'''(?!.*[.a-zA-Z][.a-zA-Z])([一-龥][一-龥]{2,})''')
50 | while True:
51 | res = pat.sub('', txt)
52 | if res == txt:
53 | return res
54 | else:
55 | txt = res
56 | exclude_cht(txt)
57 |
58 | def find_emo(txt, source=False):
59 | txt = ensure_unicode(txt)
60 | txt_mod = clear_num(txt)
61 | txt_mod = exclude_cht(txt_mod)
62 | emo_pat = re.compile(u'''(?!.*[.a-zA-Z][.a-zA-Z])([%s].{3,})''' % heads)
63 | res = emo_pat.findall(txt_mod)
64 | res = post_check(res)
65 | res = [i for i in res if i in txt]
66 | if source == True:
67 | for x in res:
68 | txt = txt.replace(x, ''+x+'')
69 | return (res,txt)
70 | return res
71 |
72 |
--------------------------------------------------------------------------------
/jseg/__init__.py:
--------------------------------------------------------------------------------
1 | from .jieba import Jieba # noqa
2 | __version__ = '0.0.4'
3 |
--------------------------------------------------------------------------------
/jseg/brill_tagger.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amigcamel/Jseg/1b8f6aa5e65cfa770cb7e81df0a4fbc15a7bcd35/jseg/brill_tagger.pkl
--------------------------------------------------------------------------------
/jseg/emoall.txt:
--------------------------------------------------------------------------------
1 | (メ゚Д゚)メ
2 | (〃^◇^〃)
3 | Zz(´-ω-`*)
4 | (꒪ͦᴗ̵̍꒪ͦ )
5 | (`へ´≠)
6 | (♡˙︶˙♡)
7 | |Φ|<|′|◇|‵|>|Φ|
8 | (( へ(へ´∀`)へ
9 | ╮(﹋﹏﹌)╭
10 | (ó㉨ò)
11 | (́^◞౪◟<‵)
12 | ( ´∀`);y=ー(゚д゚)・∵. ターン
13 | (*˘︶˘*).。.:*♡
14 | (-◎◇◎-)o
15 | ゚Å゚)
16 | 爻((╬ಠิ益ಠิ))イカリズムッ!!
17 | ヽ( ^∀^)ノ
18 | (́提◞౪◟供‵)
19 | ╮(′ω‵)╭
20 | -@{}@{}-υ( ̄▽ ̄*)
21 | (((o(*゚▽゚*)o)))
22 | ╮(╯∀╰)╭
23 | 。・゚・(つд`゚)・゚・
24 | (^_っ^)
25 | σ >︿<ο
26 | ( ◞≼☸≽◟ ._ゝ◞≼☸≽◟)
27 | ( 3ω3)
28 | (「・ω・)「
29 | ( *´◒`*)
30 | (^-^*)
31 | ( • ̀ω•́ )
32 | (́✺◞౪◟✺‵)
33 | d(`・∀・)b
34 | (*゚∀゚*)
35 | o(‵▽′)o
36 | ⶸ(ᖛ Δ ᖜ)ⶸ
37 | ( ´゚,_」゚)バカジャネーノ
38 | (。・㉨・。)
39 | ╠╦╩╦╦╩╣◇‧)
40 | ☻
41 | (´・ω・)つ旦
42 | ╮(▔▽▔")╭
43 | ┐(´д`)┌
44 | (゚д⊙)
45 | _(´ཀ`」 ∠)_
46 | /(* ̄△ ̄)\
47 | (╬☉д⊙)
48 | ヾ(。 ̄□ ̄)ツ
49 | Σ(;゚д゚)
50 | ( ̄(エ) ̄)
51 | ^(*)^
52 | (´・ω・`)
53 | (=˙~˙=)
54 | ⊙▃⊙
55 | 。゚ヽ(゚´Д`)ノ゚。
56 | ∑(ι´Дン)ノ
57 | ♡(*´∀`*)人(*´∀`*)♡
58 | <( ̄︶ ̄)>
59 | -`д´-
60 | (◞≼◉ื≽◟;益;◞≼◉ื ≽◟)
61 | (T◇T)
62 | (ゝ∀・)
63 | (#╯ ̄□ ̄)╯~┴─┴
64 | ε=ε=ε=ε=ε=ε=╭( ̄▽ ̄)╯
65 | (゜皿。)
66 | (〒︿〒)
67 | 凸(= _=)凸
68 | ლ(⁰⊖⁰ლ)
69 | (-_____-x)
70 | (ᇂдᇂ )
71 | (*≥▽≤)ツ┏━┓
72 | (́ತ◞౪◟ತ‵)
73 | ლ|^Д^ლ|
74 | (#‵□′)╯
75 | ︿(′□‵)︿
76 | ( ˘•ω•˘ )
77 | (́墨◞౪◟鏡‵)
78 | (శωశ)
79 | ┌|◎o◎|┘
80 | ・゜・(PД`q。)・゜・
81 | (╬゚д゚)
82 | ╮(╯╴╰∥)╭
83 | (눈‸눈)
84 | ◑ω◐
85 | =.=
86 | ☜(:♛ฺ;益;♛ฺ;)☞
87 | (*‵◇′*)
88 | (<ゝω・) 綺羅星☆
89 | (°Д°)
90 | ヾ(´︶`*)ノ♬
91 | ┏( .-. ┏ ) ┓
92 | (´c_`)
93 | ⊂彡☆))д`)
94 | ( ̄▽ ̄)
95 | (┛`д´)┛
96 | q(′□‵)p
97 | //( ◕‿‿◕ )\
98 | (๑・ิ⋖⋗・ิ๑)
99 | ლ(╹ε╹ლ)
100 | (o`・ω・)ノ(ノД`)
101 | ᕕ ( ᐛ ) ᕗ
102 | ჰჰჰ❛‿❛ჴჴჴ
103 | ρ(・ω・、)
104 | (。-_-。)
105 | (´・_・`)
106 | ლ(́◕◞౪◟◕‵ლ)
107 | = =
108 | (๑•́ ₃ •̀๑)
109 | \\\\٩( 'ω' )و ////
110 | ( ´゚Д゚`)
111 | ( ‘д‘⊂彡☆))Д´)
112 | (;_;)
113 | ლ(´•д• ̀ლ
114 | Σ(゚д゚)
115 | <*)) >>=<
116 | (•ㅂ•)/
117 | ヾ(*ΦωΦ)ツ
118 | ( ゚∀゚)つ≡≡≡♡♡♡)`ν゜)グシャ
119 | \(′□‵)/
120 | ε=ε=ヾ(;゚д゚)/
121 | _(┐「﹃゚。)_
122 | ㄟ( ̄▽ ̄ㄟ)﹏
123 | ~(╯▽╰)~
124 | o(☆Ф∇Ф☆)o
125 | (*´ω`)人(´ω`*)
126 | ┳━┳ノ( OωOノ)
127 | ~(~o ̄▽ ̄)~o.....o~(_△_o~)~..
128 | (゚⊿゚)
129 | (〒︵〒)
130 | (:◎)≡
131 | 。゚・(ノД`)人(´Д`)人(Д` )・゚。
132 | ┳━┳ノ( ' - 'ノ)
133 | (#゚Д゚)
134 | ლ(・∀・ )ლ
135 | (́≖◞౪◟≖‵)/
136 | (´~`)
137 | (¦3[▓▓]
138 | (╯≧▽≦)╯ ~╩═══╩
139 | (❛◡❛✿)
140 | 乁( ◔ ౪◔)「
141 | (ー` )
142 | ( ′-`)y-~
143 | (´-ω-`)
144 | (σ´□`)σ
145 | ᕦ(ò_óˇ)ᕤ
146 | ₍₍◝(・'ω'・)◟⁾⁾
147 | ◢▆▅▄▃ 溫╰(〞︶〝) ╯馨 ▃▄▅▆◣
148 | ( ºΔº )
149 | 흫_흫
150 | (°c_,°`)
151 | (╬♛. 益♛. )
152 | (́✸◞♉◟✸‵)
153 | (=´ᴥ`)
154 | ఠ_ఠ
155 | : ♡。゚.(*♡´◡` 人´◡` ♡*)゚♡ °・
156 | (╬ಠ益ಠ)ゴルァ!!
157 | ━(゚∀゚)━( ゚∀)━( ゚)━( )━( )━(゚)━(∀゚)━(゚∀゚)━
158 | (◔⊖◔)つ
159 | (,,゚Д゚)
160 | ⁄(⁄ ⁄•⁄ω⁄•⁄ ⁄)⁄
161 | ಸ_ಸ
162 | (^ω^)
163 | ლ(・´ェ`・ლ)
164 | (⁰▿⁰)
165 | (メ3[____]
166 | ﹏﹏(ㄏ ̄▽ ̄)ㄏ
167 | ლ(́ಢ.◞౪◟ಢ‵ლ)
168 | ヽ(゜▽゜ )-C<(/;◇;)/~[拖走]
169 | (´◓Д◔`)
170 | (≧∀≦)ゞ
171 | (´д((☆ミPia!⊂▼(o ‵-′o)▼つPia!彡★))∀`)
172 | ˋ(′~‵")ˊ
173 | (o ̄∇ ̄)=◯)`ν゜)・;'
174 | ✧◝(⁰▿⁰)◜✧
175 | <(_ _)>
176 | (。A。)
177 | (′Q‵)╭~
178 | (๑•̀ㅂ•́)و✧
179 | (☞゚∀゚)゚∀゚)☞
180 | (´≝◞⊖◟≝`)
181 | (●б人<●)
182 | ((└(:3」┌)┘))
183 | ( •́ _ •̀)?
184 | (́◕///◞౪◟///◕‵)/
185 | (๑¯∀¯๑)
186 | (◞‸◟)
187 | (* ̄◇ ̄)╯
188 | ヽ(㊤V㊤*)ノ
189 | \(" ̄□ ̄)/
190 | (′・_・`)
191 | Σ(°Д°;
192 | <( ̄>< ̄)>
193 | (ㄨ‵◇′)凸
194 | ◥(ฅº₩ºฅ)◤
195 | (‘⊙д-)
196 | ლ( • ̀ω•́ )っ
197 | ( ̄╔◇╗ ̄)
198 | ㄟ( ̄▽ ̄ㄟ)﹏﹏
199 | (́۞◞౪◟۞‵)
200 | (@◇@)
201 | ( ̄(∞) ̄)y-~
202 | (  ̄ c ̄)y▂ξ
203 | (ÒωÓױ)
204 | (@^◇^@)
205 | (」・ω・)」うー!(/・ω・)/にゃー!
206 | (﹋﹏﹌)
207 | (|||゚д゚)
208 | (Ò㉨Óױ)
209 | ( ̄﹏ ̄)
210 | (▰˘◡˘▰)
211 | (╯ᕡὤᕡ)╯
212 | \⊙▽⊙/
213 | ƪ(•̃͡ε•̃͡)∫
214 | ( ´∀`)σ
215 | (’へ’)
216 | (╯°Д°)╯ ┻━┻
217 | (((┏(; ̄▽ ̄)┛
218 | ԅ(¯﹃¯ԅ)
219 | (゚∀。)
220 | 州‧◇‧州
221 | ヽ(∀゚ )人(゚∀゚)人( ゚∀)人(∀゚ )人(゚∀゚)人( ゚∀)ノ
222 | ( ゚Д゚)σ
223 | ( ゚Д゚)ノ⌒⑤
224 | ヽ( ° ▽°)ノ
225 | /(′˙‵)\
226 | (●⁰౪⁰●)
227 | (❍ᴥ❍ʋ)
228 | ‷̗ↂ凸ↂ‴̖
229 | ╰( ̄▽ ̄)╯
230 | ..._〆(°▽°*)
231 | (゚∀゚ )
232 | ╮(〒﹏〒)╭
233 | ︿(@ ̄︶ ̄@)︿
234 | (΄ಢ◞౪◟ಢ‵)◉◞౪◟◉)
235 | (メ ゚皿゚)メ
236 | (*´з`*)
237 | ε≡(ノ´_ゝ`)ノ
238 | (〃∀〃)
239 | ~( ̄▽ ̄)~(_△_)~( ̄▽ ̄)~(_△_)~( ̄▽ ̄)~
240 | ಠ_ಠ
241 | (・ิω・ิ)
242 | ヽ(=^・ω・^=)丿
243 | (。í _ ì。)
244 | ˋ( ° ▽﹑°|||| )
245 | (́థ◞..◟థ‵)
246 | ( ´∀`)つt[ ]
247 | ( ̄ε(# ̄)☆
248 | (‵□′)--●(#) ̄(00) ̄)
249 | ( ´・◡・`)
250 | (:3[___]=
251 | (`_ゝ´)
252 | (。・ω・。)
253 | (´◔∀◔`)
254 | (ლ ^ิ౪^ิ)ლ
255 | ﹏﹏(ㄏ ̄▽ ̄)ㄏ (一(oo)一)* ㄟ( ̄▽ ̄ㄟ)﹏
256 | (•‾⌣‾•)
257 | ....ˊˋ------。:.゚_ヽ(_´∀`_)ノ_.:。((浮水
258 | ・*・:≡( ε:)
259 | 魔貫光殺砲(゚Д゚)σ━00000000000━●
260 | ╰(‵□′#)
261 | (= ̄◇ ̄=)V
262 | ( ´・・)ノ(._.`)
263 | (ノ∀` )
264 | ヽ(#`Д´)ノ
265 | (゚Д゚≡゚Д゚)?
266 | ┌|≧◇≦|┘做└|≧◇≦|┐體┌|≧◇≦|┘操
267 | (#▔︵▔)=====●( ̄ ⊿" ̄)╭
268 | (#‵▽′)/~~~~~~~~~(☆___☆)
269 | _ノ乙(、ン、)_
270 | (´-ωก`)
271 | (*^ ◇ ^*)
272 | ( ᐛ)パァ
273 | ლ(・ ิω・ิლ)
274 | (́>◞౪◟<‵)ノシ
275 | (*‘ v`*)
276 | ( ¯﹁¯ )
277 | (ヾノ・ω・`)
278 | (゚ω´)
279 | (σ回ω・)σ←↑→↓←↑
280 | キタ━━━━(゚∀゚)━━━━!!
281 | _(:3 」∠ )_
282 | (ノ◕ヮ◕)ノ*:・゚✧
283 | o(@//////@)o
284 | _(¦3」∠)_
285 | öㅅö
286 | (゚ロ ゚ノ)ノ
287 | (╯-_-)╯╧╧
288 | (一(oo)一)*
289 | (●▼●;)
290 | (╬゚д゚)▄︻┻┳═一
291 | (^u^)
292 | (`・ω・´)
293 | ( ′∀`)σ≡σ☆))Д′)
294 | ( ~'ω')~
295 | :;(∩´﹏`∩);:
296 | (≖ᴗ≖๑)
297 | (ㄏ ̄▽ ̄)ㄏ ㄟ( ̄▽ ̄ㄟ)
298 | <Φ﹏Φ>
299 | (ノ`⊿´)ノ
300 | (ノ・_・)ノ凹 ┣凹━凹━凹┫
301 | ╮(╯3╰)╭ ╮(╯_╰)╭ ╮(╯▽╰)╭ ╮( ̄▽ ̄)╭
302 | (・ω´・ )
303 | ヽ( ´ー)ノ
304 | Φ౪Φ
305 | (*ω*)
306 | ╭(╭(╭(ㄏ ̄□ ̄)ㄏ
307 | ฅ(๑*д*๑)ฅ!!
308 | ╮(╯▽╰")╭☆
309 | ╭(╭(╭ (ㄏ ̄△ ̄)ㄏ
310 | (́⊙◞౪◟⊙‵)
311 | (灬ºωº灬)
312 | -_-||
313 | Σ(゚Д゚;≡;゚д゚)
314 | (΄◉◞౪◟◉‵)
315 | ლ(`∀´ლ)
316 | (○‵△′)=○○○○( ̄#)3 ̄)
317 | (́。◞౪◟。‵)
318 | (○^~^○)
319 | ٩(。・ω・。)و
320 | (“ ̄▽ ̄)-o█ █o-( ̄▽ ̄”)/
321 | ┬┴┬┴┤(‧◇├┬┴┬
322 | d(・ω・d) 微分!(∫・ω・)∫ 積分!∂(・ω・∂) 偏微分!(∮・ω・)∮ 沿閉曲線的積分!(∬・ω・)∬ 重積分!∇(・ω・∇)梯度!∇・(・ω・∇・)散度!∇×(・ω・∇×)旋度!Δ(・ω・Δ)拉普拉斯!
323 | ╰(‵△′)╯
324 | (ಠ益ಠ)
325 | (´,_ゝ`)
326 | (・∀・)っ/凵
327 | (((゚Д゚;)))
328 | (╥﹏╥)
329 | (́〠◞౪◟〠‵)
330 | Ψ(≧◇≦)Ψ
331 | ( ≧◇≦ )
332 | (/ ↀ3ↀ)/q(﹌ω﹌ )
333 | ლ(╹◡╹ლ)
334 | └( ̄^ ̄ )┐
335 | (o´・ω・`)σ)Д`)
336 | (;д;)
337 | _(:3 ⌒゙)_
338 | ( ̄◇ ̄)凸
339 | XD
340 | ╮(╯﹏╰)╭
341 | (^y^)
342 | (╯ ‵Д′)╯╧───╧
343 | (σ′▽‵)′▽‵)σ
344 | ಠ益ಠ
345 | (´,,•ω•,,)♡
346 | (′□‵)╭~
347 | (/Д`)~゚。
348 | (o・e・)
349 | ( ╬◣ 益◢)y
350 | ∥∥<∥′ ◇‵ || >∥∥
351 | ≧▽≦
352 | ~( ̄0 ̄)~
353 | (´;ω;`)
354 | ( っค้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้้c )
355 | ξ( ✿>◡❛)▄︻▇▇〓▄︻┻┳═一
356 | (´⊙ω⊙`)
357 | (≖_≖)✧
358 | ||Φ|(|´|Д|`|)|Φ||
359 | (∩^o^)⊃━☆゚.*・。
360 | o(‧/////‧)o
361 | (=´ω`=)
362 | Σ>―(〃°ω°〃)♡→
363 | (╯‵□′)╯︵┴─┴
364 | (*´・д・)?
365 | _(°ω°」 ∠)
366 | (・∀・)つ⑩
367 | (-ι_- )
368 | (́▣◞౪◟▣‵)
369 | ┬─┬ ノ( ' - 'ノ)
370 | ٩(ŏ﹏ŏ、)۶
371 | (# ̄▽ ̄)o╭╯ ☆╰╮o( ̄▽ ̄///)<
372 | ( శ 3ੜ)~♥
373 | (づ′▽`)づ
374 | ´-ω-)b
375 | o(*/////*)o
376 | \(≧ω≦)/
377 | (☉д⊙)
378 | (′ω‵)y-~
379 | (ΦωΦ)
380 | (́☜◞౪◟☞`)/
381 | (〒△〒)
382 | ξ( ✿>◡❛)
383 | (´◔ ₃ ◔`)
384 | (o ─_─)==O)。O。)
385 | (Π△Π)
386 | (=m=)
387 | o(⊙/////⊙)o
388 | (´_ゝ`)
389 | (°ཀ°)
390 | ๛ก(ー̀ωー́ก)
391 | (→ܫ←)
392 | ( ╯' - ')╯ ┻━┻
393 | (´◉㉨◉)
394 | (=゚Д゚=) ▄︻┻┳━ ·.`.`.`.
395 | (っ・Д・)っ
396 | 。:.゚ヽ(*´∀`)ノ゚.:。
397 | m(。≧エ≦。)m
398 | ( メ∀・)
399 | ( ・・)つ―{}@{}@{}-
400 | (◕ܫ◕)
401 | ( ͡° ͜ʖ ͡°)
402 | (〒◇〒)o~
403 | ヽ(́◕◞౪◟◕‵)ノ
404 | (◜◔。◔◝)
405 | ( ¤̴̶̷̤́ ‧̫̮ ¤̴̶̷̤̀ )
406 | (́◉◞౪◉*◉◉◞◕౪౪◟◕◉◉φ◉౪◞◉*‵)
407 | (*´д`)~♥
408 | Ỏ̷͖͈̞̩͎̻̫̫̜͉̠̫͕̭̭̫̫̹̗̹͈̼̠̖͍͚̥͈̮̼͕̠̤̯̻̥̬̗̼̳̤̳̬̪̹͚̞̼̠͕̼̠̦͚̫͔̯̹͉͉̘͎͕̼̣̝͙̱̟̹̩̟̳̦̭͉̮̖̭̣̣̞̙̗̜̺̭̻̥͚͙̝̦̲̱͉͖͉̰̦͎̫̣̼͎͍̠̮͓̹̹͉̤̰̗̙͕͇͔̱͕̭͈̳̗̭͔̘̖̺̮̜̠͖̘͓̳͕̟̠̱̫̤͓͔̘̰̲͙͍͇̙͎̣̼̗̖͙̯͉̠̟͈͍͕̪͓̝̩̦̖̹̼̠̘̮͚̟͉̺̜͍͓̯̳̱̻͕̣̳͉̻̭̭̱͍̪̩̭̺͕̺̼̥̪͖̦̟͎̻̰_Ỏ̷͖͈̞̩͎̻̫̫̜͉̠̫͕̭̭̫̫̹̗̹͈̼̠̖͍͚̥͈̮̼͕̠̤̯̻̥̬̗̼̳̤̳̬̪̹͚̞̼̠͕̼̠̦͚̫͔̯̹͉͉̘͎͕̼̣̝͙̱̟̹̩̟̳̦̭͉̮̖̭̣̣̞̙̗̜̺̭̻̥͚͙̝̦̲̱͉͖͉̰̦͎̫̣̼͎͍̠̮͓̹̹͉̤̰̗̙͕͇͔̱͕̭͈̳̗̭͔̘̖̺̮̜̠͖̘͓̳͕̟̠̱̫̤͓͔̘̰̲͙͍͇̙͎̣̼̗̖͙̯͉̠̟͈͍͕̪͓̝̩̦̖̹̼̠̘̮͚̟͉̺̜͍͓̯̳̱̻͕̣̳͉̻̭̭̱͍̪̩̭̺͕̺̼̥̪͖̦̟͎̻̰
409 | ヽ(`д´)ノ
410 | ლ(╯⊙ε⊙ლ╰)
411 | (੭ु´ ᐜ `)੭ु⁾⁾
412 | (๑´ڡ`๑)
413 | ฅ●ω●ฅ
414 | ( ̄ー ̄)
415 | (  ̄□ ̄)/ 敬禮!! <( ̄ㄧ ̄ ) <( ̄ㄧ ̄ )
416 | (`┌_┐´)
417 | ↖╭╰('(Ω)')╯
418 | (‾◡◝ )
419 | (((m -_-)m
420 | (ㄨ‵◇′)
421 | ( ◕‿‿◕ )
422 | (*☉౪⊙*)
423 | ( ´☣///_ゝ///☣`)
424 | Σ┗(@ロ@;)┛
425 | ○(#‵︿′ㄨ)○
426 | (◞≼●≽◟◞౪◟◞≼●≽◟)
427 | ( *¯ ³¯*)♡ㄘゅ
428 | ╰( ̄︶ ̄)╯
429 | (*´艸`*)
430 | 凸(▔︵▔#)
431 | ヽ(・×・´)ゞ
432 | Σ(lliд゚ノ)ノ
433 | (つд⊂)
434 | (//´/◒/`//)
435 | ( ^ิ౪^ิ)
436 | \(Π◇Π)
437 | >///<
438 | ( ´(00)`)
439 | (✧≖‿ゝ≖)
440 | (☍﹏⁰)
441 | (・∀・)ノ三G[__]コ
442 | (╬ಠิ益ಠิ)
443 | ⊂(°Д°⊂⌒`つ≡≡≡(´⌒;;;≡≡≡
444 | ლ(இ e இ`。ლ)
445 | (〒﹏〒)
446 | ( ゚∀゚)o彡゚
447 | |Д`)ノ⌒●~*
448 | (´>∀)人(´・ω・)ノヽ(・ε・*)人(-д-`)
449 | Σ(゚ω゚)
450 | ヾ(●゜▽゜●)♡
451 | \( ̄◇ ̄)/
452 | (o´罒`o)
453 | ╮(╯~╰" )╭
454 | (σ゚∀゚)σ゚∀゚)σ゚∀゚)σ
455 | (´-`).。oO
456 | (ノ∀`*)
457 | (๑≖ิิ益≖ิ๑)
458 | ☝( ◠‿◠ )☝
459 | Σ( ° △ °)
460 | (Π___Π)
461 | \(●´ϖ`●)/
462 | ε(*´・∀・`)з゙
463 | (⁰⊖⁰)
464 | (#゚⊿`)凸
465 | (๑ơ ₃ ơ)♥
466 | (́=◞౪◟=‵)
467 | ╰(‵□′)╯
468 | (-`ェ´-╬)
469 | どこ━━━━(゜∀゜三゜∀゜)━━━━!!??
470 | (;´༎ຶД༎ຶ`)
471 | (。>﹏<)哈啾
472 | ლ(^ω^ლ)
473 | 川‧◇‧川
474 | (╯◕◞౪◟◕‵)╯︵ ┴─┴
475 | ( ゚ Χ ゚)
476 | (っ●ω●)っ
477 | (╬゚◥益◤゚)
478 | ٩(๑•̀ω•́๑)۶
479 | ↁ_ↁ
480 | il||li _| ̄|○ヽ(・ω・`)
481 | (´A`。)
482 | =(─(oo)─)=
483 | (♛‿♛)
484 | ʅ(´◔౪◔)ʃ
485 | m9(^Д^)プギャー
486 | (ÒܫÓױ)
487 | ╮(′~‵〞)╭
488 | (☄◣ω◢)☄
489 | ヽ(`Д´)ノ
490 | (╬゚ ◣ ゚)
491 | (σ゚∀゚)σ..:*☆
492 | (◔౪◔)
493 | (#`皿´)
494 | (。◕∀◕。)
495 | ヾ(◎´・ω・`)ノ
496 | ತಎತ
497 | (๑´ㅁ`)
498 | (´・Å・`)
499 | (((゚д゚)))
500 | ( ´థ౪థ)σ
501 | ─=≡Σ((( つ•̀ω•́)つ
502 | (´;ω;`)ヾ(・∀・`)
503 | (つ> _◕)つ︻╦̵̵͇̿̿̿̿╤───
504 | (´///☁///`)
505 | ε٩(๑> ₃ <)۶з
506 | Σ(*゚д゚ノ)ノ
507 | (゚д゚≡゚д゚)
508 | (́இ◞౪◟இ‵)
509 | ⇎_⇎
510 | (゚3゚)~♪
511 | |||||╭(_◇_)╮|||||
512 | (✘﹏✘ა)
513 | (´=ิ益=ิ`)
514 | ლ(•̀ _ •́ ლ)
515 | (*^﹏^*)
516 | (❁´ω`❁)*✲゚*
517 | (っ´ω`c)
518 | ☆^(o´Ф∇Ф)o
519 | レ(゚∀゚;)ヘ=З=З=З
520 | (*^ー^)
521 | (⊜‿⊜✴)
522 | (╯^◇^)╯
523 | ლ(◉◞౪◟◉ )ლ
524 | இдஇ
525 | ( *‵Д′)--●(#)‵⊿′)╮
526 | \( ̄ ◇,  ̄)/
527 | (ㆆᴗㆆ)
528 | (*゚ー゚)
529 | ( `皿´)ノ"鹵=巛巛巛巛.:;:
530 | (〃 ̄ω ̄)人( ̄︶ ̄〃)
531 | σ`∀´)σ
532 | -{}@{}@{}-υ( ̄◇ ̄*)
533 | (・`ω´・)
534 | (//´◜◞⊜◟◝`////)
535 | (≧∀≦)
536 | (;´д`).。oO(・・・・)
537 | ∑( ̄□ ̄;)
538 | (ง๑ •̀_•́)ง
539 | (;´゚ω゚`人)
540 | ╮/(>▽<)人(>▽<)╭
541 | ╩══╩ ~ ╰( ̄△ ̄╰)
542 | (╭ ̄3 ̄)╭♡
543 | ╮(﹀ω﹀")╭
544 | ( ՞ټ՞)
545 | _(┐「ε:)_
546 | ((((<||_◇_>
547 |  ̄﹃ ̄
548 | #゚Å゚)⊂彡☆))゚Д゚)・∵
549 | ( ▔︿▔)╭~
550 | ( ¯•ω•¯ )
551 | |||╭(_◇_)╮|||
552 | ( ゚Å゚)
553 | Σ(゚д゚lll)
554 | (´゚д゚`)
555 | Σ(・ω・ノ)ノ
556 | ლ(•ω •ლ)
557 | ^(˙oo˙)^
558 | ヽ(✿゚▽゚)ノ
559 | (´∩ω∩`)
560 | (*^@^*)
561 | (・ε・)
562 | ლↂ‿‿ↂლ
563 | (´◉‿◉`)
564 | ◔"L__◔
565 | ◝( ゚∀ ゚ )◟
566 | q(〒□〒)p
567 | ◢▆▅▄▃崩╰(〒皿〒)╯潰▃▄▅▇◣
568 | ◢▆▅▄▃ 崩╰(〒皿〒)╯潰 ▃▄▅▆◣
569 | ε≡ヘ( ´∀`)ノ
570 | (;̦̦̦̦̦̦̦̦ↂ⃝⃓⃙⃚⃘дↂ⃝⃓⃙⃚⃘;̦̦̦̦̦̦̦̦)
571 | (´σ-`)
572 | (゚皿゚メ)
573 | (^ρ^)/
574 | 。・゚・(ノД`)ヽ(゚Д゚ )秀秀
575 | ( ఠൠఠ )ノ
576 | _:(´□`」 ∠):_
577 | <(‵□′)>
578 | ლ(´∀`ლ)
579 | (✺ω✺)
580 | (๑•ૅω•´๑)
581 | ┌(┌^o^)┐
582 | (́순◞౪◟순‵)
583 | (# ̄▽ ̄)o╭╯ ★
584 | Σ(`L_` )
585 | ^( ̄(oo) ̄)^
586 | ლ(^o^ლ)
587 | (/‵Д′)/~ ╧╧
588 | (*´Д`)つ))´∀`)
589 | ( ☉_☉)≡☞o────★°
590 | ლ(́◉◞౪◟◉‵ლ)
591 | ┏┛學校┗┓λλλλλ
592 | (*´д`)
593 | (V)(́◉◞౪◟◉‵)(V)
594 | Σ Σ Σ (」○ ω○ )/
595 | ヾ(´ε`ヾ)
596 | 0(:3 )~ ('、3_ヽ)_
597 | =└(┐卍^o^)卍
598 | (;゚д゚)
599 | (*ˇωˇ*人)
600 | ( ♥д♥)
601 | ☮▁▂▃▄☾ ♛ ◡ ♛ ☽▄▃▂▁☮
602 | /(′□‵)\
603 | ( º﹃º )
604 | ( ´థ,_‥థ`)b
605 | ( Φ ω Φ )
606 | థ౪థ
607 | (*-*)y~
608 | (゚∀゚)
609 | |柱|ω・`)
610 | (́つ◞౪◟⊂‵)
611 | ( ´-ω ・)▄︻┻┳══━
612 | \(●o○;)ノ
613 | >(˙ω˙)<
614 | d(d'∀')
615 | (´^ิ益^ิ` )
616 | ( ´・ω)
617 | =_=
618 | \( ̄□ ̄)/
619 | (∂ω∂)
620 | (థฺˇ౪ˇథ)
621 | (✪ω✪)
622 | (σ ─.─)σ
623 | └|‧◇‧|┐
624 | |////| ( )ノ |////|(自動門
625 | 〠_〠
626 | (`皿´)
627 | 。゜+.(人-ω◕)゜+.゜
628 | ╮( ̄◇ ̄)╭
629 | _(√ ζ ε:)_
630 | (´ー`)y─┛
631 | ༼ つ/̵͇̿̿/’̿’̿ ̿ ̿̿ ̿̿◕ _◕ ༽つ/̵͇̿̿/’̿’̿ ̿ ̿̿ ̿̿ ̿̿
632 | (`д´)
633 | (ノДT)
634 | (¯ □ ¯)
635 | /(╯▽╰")\
636 | (ノД`)
637 | ( ◜◡‾)
638 | o(─//////─)o
639 | (:3っ)へ ヽ(´Д`●ヽ)
640 | (#‵)3′)▂▂▂▃▄▅~~~嗡嗡嗡嗡嗡
641 | ☆⌒(*^-゜)v
642 | ╮( ̄(oo) ̄)╭
643 | (´≖◞౪◟≖)
644 | (´∀`∩)↑↑
645 | ( ˘•ω•˘ )◞⁽˙³˙⁾
646 | ( ×ω× )
647 | (#`Д´)ノ
648 | ლ(❛◡❛✿)ლ
649 | -//(ǒ.ǒ)//-
650 | (́◉◞౪◟◉‵)
651 | ━( ิ◕㉨◕ ิ)━
652 | ╮( ̄□ ̄)╭
653 | (́一◞౪◟一‵)
654 | ヾ(;゚(OO)゚)ノ
655 | q( ̄ε ̄ )p
656 | (́◕◞ㆀ◟◕`)
657 | ╮(╯▽╰)╭
658 | ⊙谷⊙
659 | (๑థิ益థิ)
660 | <(‵△′)>
661 | ლ(゚д゚ლ)
662 | (╬⓪益⓪)
663 | ♥(´∀` )人
664 | ∪( ̄﹁ ̄)∪
665 | (・ิ㉨・ิ)
666 | (  ̄ 3 ̄)y▂ξ
667 | ┏((= ̄(エ) ̄=))┛
668 | ≧〔゜゜〕≦
669 | =͟͟͞͞( •̀д•́)
670 | (,_ゝ۞)
671 | (つД`)
672 | \( ̄□ ̄*)/
673 | (*─︵─)====Co( 一//////一)o
674 | \(⊙O⊙)/
675 | σ(‥)
676 | 。゚(゚´ω`゚)゚。
677 | ( ´∀`)
678 | (ゝ∀・)⌒☆
679 | (`3´)
680 | ( ˙灬˙ )
681 | (o゚ω゚o)
682 | (*゜ー゜)b
683 | ╰(〒○〒)╯
684 | (っ﹏-) .。o
685 | ✧(순_순)"
686 | ╯-____-)╯~═╩════╩═
687 | (●`・(エ)・´●)
688 | (。◇。||)
689 | ╮(╯_╰)╭
690 | Σヽ(゚Д ゚; )ノ
691 | ( ΄◞ิ .̫.̫ ◟ิ‵)y
692 | (#▔皿▔)─<(*—_____–)
693 | ( っธิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิิ์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์์ლ)
694 | (。´∀`)ノ―⊂ZZZ⊃
695 | ( ิΘ㉨Θ)ิ
696 | (´・д・`)ハ(・д・`*)
697 | (╯°O°)╯┻━┻
698 | σ ゚∀ ゚) ゚∀゚)σ
699 | (⌒o⌒)
700 | (´-ι_-`)
701 | ( ・O・)ノ⌒●
702 | *ଘ(੭*ˊᵕˋ)੭* ੈ✩‧₊˚
703 | (*´∀`)~♥
704 | (,,・ω・,,)
705 | (゜o゜(☆○=(-_- )
706 | ( ^ω^)
707 | (〒__〒)
708 | ┏(_д_┏)┓))
709 | (◓Д◒)✄╰⋃╯
710 | ヾ(;゚;Д;゚;)ノ゙
711 | (д) ゚゚
712 | | 柱 |┐( ̄◇ ̄*)
713 | o(.〝.)o
714 | (ㄒ◇ㄒ)o~
715 | ก็ʕ•͡ᴥ•ʔ ก้
716 | <(‵︵′)>
717 | (╯#▔__▔)╯┴┴\(" ̄□ ̄)
718 | ﹏﹏(ㄏ ̄△ ̄)ㄏ
719 | ( ˘・з・)
720 | (╯°▽°)╯ ┻━┻
721 | \(" ̄▽ ̄)/
722 | (•ө•)
723 | ~( ̄▽ ̄)~
724 | ( ิ◕㉨◕ ิ)
725 | ┴┴~╰(‵□′#)
726 | (:3[__]4
727 | (›´ω`‹ )
728 | ( ´Д`)y━・~~
729 | ◑▂◐
730 | (°ロ°٥)
731 | (・∀・)
732 | (。ŏ_ŏ)
733 | (ノ>ω<)ノ
734 | ( ゚∀゚) ノ♡
735 | ヽ(●´ε`●)ノ
736 | •✞_✞•
737 | (ゝ∀・)b
738 | (๑• . •๑)
739 | ◉ื益◉ื
740 | (╯ ‵□′)╯
741 | (´;;◉;⊖;◉;;`)
742 | (。•ㅅ•。)♡
743 | (ㅅ˘ㅂ˘)
744 | ٩(๑´3`๑)۶
745 | (´ΘωΘ`)
746 | (≖‿ゝ≖)✧
747 | ( ̄y▽ ̄)╭
748 | ,,Ծ‸Ծ,,
749 | σ(´∀`*)
750 | (´▽`ʃ♡ƪ)
751 | O-(/// ̄皿 ̄)☞ ─═≡☆゜★█▇▆▅▄▃▂_
752 | ʕ•̫͡•ʔ
753 | o(‧""‧)o
754 | (`∀´)
755 | ⁽⁽ ◟(∗ ˊωˋ ∗)◞ ⁾⁾
756 | (◍•ᴗ•◍)ゝ
757 | (*’ー’*)
758 | ( σ՞ਊ ՞)σ
759 | ( ˘•ω•˘ ).oOஇ
760 | ( ̄□ ̄||)
761 | (●` 艸 ´)
762 | (ಥ_ಥ)
763 | ⋉(● ∸ ●)⋊
764 | ✧*。٩(ˊᗜˋ*)و✧*。
765 | ( ´•̥̥̥ω•̥̥̥` )
766 | \(^ω^)/
767 | 癶(癶;:゜;益;゜;)癶
768 | (●´ω`●)ゞ
769 | (o゚ロ゚)┌┛Σ(ノ´*ω*`)ノ
770 | \(╯﹏╰)
771 | |柱|ω゚)---c<` д゚)!
772 | (๑´ㅂ`๑)
773 | (╯‵□′)╯︵┴─┴
774 | ╮(▔ε▔)╭
775 | 凸( ̄皿 ̄*)
776 | (゚々。)
777 | ヾ(>Д<;)))).....
778 | ヾ(:3ノシヾ)ノシ 三[____]
779 | (# ̄▽ ̄#)
780 | <( ̄︶ ̄)/
781 | <( ̄3 ̄)>
782 | づ(・ω・)づ
783 | い・ω・づ*
--------------------------------------------------------------------------------
/jseg/emodet.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | from os.path import abspath, dirname, join
3 |
4 | CUR_PATH = dirname(abspath(__file__))
5 |
6 |
7 | def load_emo():
8 | with open(join(CUR_PATH, 'emoall.txt'), encoding="utf-8") as f:
9 | emos = f.read()
10 | emos = emos.split('\n')
11 | emos = [i for i in emos if i != '\n']
12 | emo_ext = [u'= =', u'=.=', u'=_=', u'>///<', u'> <', u'orz', u'^ ^', u'XD', u'(′・_・`)', u'-_-||', u'>"<', u'T.T']
13 | emos += emo_ext
14 | emos = list(set(emos))
15 | emos = [i.strip() for i in emos]
16 | return emos
17 |
18 |
19 | def find_emo(txt):
20 | emos = load_emo()
21 | res = [i for i in emos if i in txt]
22 | return res
23 |
--------------------------------------------------------------------------------
/jseg/jieba.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """This is a modified version of "jieba" Chinese segmentator.
3 |
4 | All credit goes to fxsjy/jieba.
5 | Find more on: https://github.com/fxsjy/jieba
6 | """
7 | from multiprocessing import Pool, cpu_count
8 | from functools import partial
9 | from os.path import dirname, abspath, join
10 | from string import punctuation
11 | from math import log
12 | import pickle
13 | import json
14 | import re
15 | import logging
16 |
17 | import six
18 |
19 | from .emodet import find_emo
20 |
21 | logging.basicConfig(level=logging.DEBUG)
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | CUR_PATH = dirname(abspath(__file__))
26 | with open(join(CUR_PATH, 'brill_tagger.pkl'), 'rb') as f:
27 | BrillTagger = pickle.load(f)
28 |
29 |
30 | class Hmm:
31 | """Hidden Markov Model."""
32 |
33 | def __init__(self):
34 | """Initialize probabilities."""
35 | self._min_float = -3.14e100
36 | self._prev_status = {
37 | 'B': ('E', 'S'),
38 | 'M': ('M', 'B'),
39 | 'S': ('S', 'E'),
40 | 'E': ('B', 'M')
41 | }
42 |
43 | self._prob = dict()
44 | probs = ['initP', 'tranP', 'emisP']
45 | for prob in probs:
46 | with open(join(CUR_PATH, 'prob', prob + '.json')) as jf:
47 | self._prob[prob] = json.load(jf)
48 |
49 | def _viterbi(self, obs, states, start_p, trans_p, emit_p):
50 | _v = [{}] # tabular
51 | path = {}
52 | for y in states: # init
53 | _v[0][y] = start_p[y] + emit_p[y].get(obs[0], self._min_float)
54 | path[y] = [y]
55 | for t in range(1, len(obs)):
56 | _v.append({})
57 | newpath = {}
58 | for y in states:
59 | em_p = emit_p[y].get(obs[t], self._min_float)
60 | (prob, state) = max(
61 | [
62 | (
63 | _v[t - 1][y0] +
64 | trans_p[y0].get(y, self._min_float) +
65 | em_p, y0
66 | )
67 | for y0 in self._prev_status[y]
68 | ]
69 | )
70 | _v[t][y] = prob
71 | newpath[y] = path[state] + [y]
72 | path = newpath
73 |
74 | (prob, state) = max([(_v[len(obs) - 1][y], y) for y in ('E', 'S')])
75 |
76 | return (prob, path[state])
77 |
78 | def hmm_seg(self, sentence):
79 | """Segmentate with HMM method."""
80 | con = []
81 | prob, pos_list = self._viterbi(
82 | sentence, ('B', 'M', 'E', 'S'),
83 | self._prob['initP'],
84 | self._prob['tranP'],
85 | self._prob['emisP']
86 | )
87 | begin, next = 0, 0
88 | for i, char in enumerate(sentence):
89 | pos = pos_list[i]
90 | if pos == 'B':
91 | begin = i
92 | elif pos == 'E':
93 | con.append(sentence[begin:i + 1])
94 | next = i + 1
95 | elif pos == 'S':
96 | con.append(char)
97 | next = i + 1
98 | if next < len(sentence):
99 | con.append(sentence[next:])
100 | return con
101 |
102 |
103 | class Jieba(Hmm):
104 | """Main class."""
105 |
106 | def __init__(self, load_ptt_dict=False):
107 | """__init__ method.
108 |
109 | :param: load_ptt_dict: load PTT dictionary (default: False)
110 | """
111 | Hmm.__init__(self)
112 | self._freq = {}
113 | self._trie = {}
114 | self._min_freq = None
115 | self._gw, self._gwr = {}, {}
116 | self._gw_num = 0
117 |
118 | self._gen_trie()
119 |
120 | if load_ptt_dict:
121 | logger.debug('loading ptt dictionary')
122 | with open(join(CUR_PATH, "ptt_encyc.txt"), encoding="utf-8") as tf:
123 | raw = tf.read()
124 | guarantee_wlst = filter(None, raw.split('\n'))
125 | self.add_guaranteed_wordlist(guarantee_wlst)
126 |
127 | def _load_dic(self):
128 | logger.debug('loading default dictionary')
129 | with open(join(CUR_PATH, "dict.txt"), encoding="utf-8") as tf:
130 | raw = tf.read()
131 | dic = re.split('\r{0,1}\n', raw)
132 | dic = filter(None, dic)
133 | # 字典只得有中文
134 |
135 | return dic
136 |
137 | def add_guaranteed_wordlist(self, lst=None):
138 | """User-defined dictoinary.
139 |
140 | :param: lst: list of words.
141 | """
142 | gw_num = self._gw_num + 1
143 | for num, word in enumerate(lst, gw_num):
144 | self._gw_num = num
145 | self._gw[word] = '_gw' + str(num) + '@'
146 | self._gwr['_gw' + str(num) + '@'] = word
147 |
148 | def _gen_trie(self):
149 | dic = self._load_dic()
150 | total = 0.0
151 | for item in dic:
152 | word, freq = item.split(' ')
153 | freq = float(freq)
154 | self._freq[word] = freq
155 | total += freq
156 | p = self._trie
157 | for char in word:
158 | if char not in p:
159 | p[char] = {}
160 | p = p[char]
161 | p[''] = '' # ending flag
162 |
163 | self._freq = dict(
164 | [(w, log(float(c) / total)) for w, c in self._freq.items()]
165 | )
166 | self._min_freq = min(self._freq.values())
167 |
168 | def _get_dag(self, sentence):
169 | senlen = len(sentence)
170 | i, j = 0, 0
171 | p = self._trie
172 | dag = {}
173 | while i < senlen:
174 | c = sentence[j]
175 | if c in p:
176 | p = p[c]
177 | if '' in p:
178 | if i not in dag:
179 | dag[i] = []
180 | dag[i].append(j)
181 | j += 1
182 | if j >= senlen:
183 | i += 1
184 | j = i
185 | p = self._trie
186 | else:
187 | p = self._trie
188 | i += 1
189 | j = i
190 | for i in range(len(sentence)):
191 | if i not in dag:
192 | dag[i] = [i]
193 | return dag
194 |
195 | def _calc_route(self, sentence, dag, idx):
196 | route = {}
197 | senlen = len(sentence)
198 | route[senlen] = (0.0, '')
199 | for idx in range(senlen - 1, -1, -1):
200 | candidates = [
201 | (
202 | self._freq.get(
203 | sentence[idx:x + 1], self._min_freq
204 | ) + route[x + 1][0],
205 | x
206 | )
207 | for x
208 | in dag[idx]
209 | ]
210 | route[idx] = max(candidates)
211 | return route
212 |
213 | def _seg(self, sentence): # cut DAG or hmm seg
214 | dag = self._get_dag(sentence)
215 | route = self._calc_route(sentence, dag, 0)
216 | dag_con = []
217 | x = 0
218 | buf = u''
219 | _n = len(sentence)
220 | while x < _n:
221 | y = route[x][1] + 1
222 | l_word = sentence[x:y]
223 | if y - x == 1:
224 | buf += l_word
225 | else:
226 | if len(buf) > 0:
227 | if len(buf) == 1:
228 | dag_con.append(buf)
229 | buf = u''
230 | else:
231 | if buf not in self._freq:
232 | regognized = self.hmm_seg(buf)
233 | for t in regognized:
234 | dag_con.append(t)
235 | else:
236 | for elem in buf:
237 | dag_con.append(elem)
238 | buf = u''
239 | dag_con.append(l_word)
240 | x = y
241 |
242 | if len(buf) > 0:
243 | if len(buf) == 1:
244 | dag_con.append(buf)
245 | else:
246 | if buf not in self._freq:
247 | regognized = self.hmm_seg(buf)
248 | for t in regognized:
249 | dag_con.append(t)
250 | else:
251 | for elem in buf:
252 | dag_con.append(elem)
253 | return dag_con
254 |
255 | def seg(self, text, pos=False):
256 | """Segmentate words.
257 |
258 | :param: text: text to be segmentated
259 | :param: pos: show POS tagging (default: False)
260 | """
261 | # find emoticons
262 | emos = find_emo(text)
263 | emocan, emocan_r = {}, {}
264 |
265 | for num, emo in enumerate(emos):
266 | emocan[emo] = '_emo' + str(num) + '@'
267 | emocan_r['_emo' + str(num) + '@'] = emo
268 |
269 | for emo in emocan.keys():
270 | text = text.replace(emo, emocan[emo])
271 |
272 | # guarantee wlst
273 | # for gw in self._gw.keys():
274 | gws = self._gw.keys()
275 | gws = sorted(gws, key=lambda x: len(x), reverse=True) # 長詞優先
276 | for gw in gws:
277 | if gw in text:
278 | text = text.replace(gw, self._gw[gw])
279 |
280 | re_han = re.compile(
281 | r"(http://.*?\s|_gw\d+?@|_emo\d+?@|[ㄅ-ㄩ一-龥]+)", re.U
282 | )
283 | # re_ch = re.compile(ur"[ㄅ-ㄩ\u4E00-\u9FA5]+")
284 | # re_alphnumeric = re.compile(ur'([a-zA-Z0-9]+)')
285 | re_eng = re.compile(r'[a-zA-Z]+')
286 | re_neu = re.compile(r'[0-9]+')
287 | re_url = re.compile(r'(http://.*?)[\s\n\r\n]')
288 | re_skip = re.compile(r"(\r{0,1}\n|\s)", re.U)
289 |
290 | puns = punctuation
291 | puns += ''.join([six.unichr(ord(i) + 65248)
292 | for i in puns]) # full-width punctuations
293 | puns += u'、。「」…“”' # 要在加上一些常用標點符號
294 | br = u'\r{0,1}\n|\s'
295 | # metachr = u'^$*+?{}[]\|()'
296 | re_pun = re.compile(u'(%s|[%s])' % (br, puns))
297 |
298 | blocks = re_han.split(text)
299 |
300 | con = []
301 | for blk in blocks:
302 | if blk.startswith('_gw'):
303 | word = self._gwr[blk]
304 | logger.debug('find in dict: %s' % word)
305 | con.append((word, None))
306 |
307 | elif blk.startswith('_emo'):
308 | word = emocan_r[blk]
309 | con.append((word, 'EMOTICON'))
310 | elif re_url.match(blk):
311 | con.append((re_url.match(blk).group(1), 'URL'))
312 | # elif re_eng.match(blk):
313 | # con.append((blk, 'FW'))
314 | elif re_han.match(blk):
315 | for word in self._seg(blk):
316 | con.append((word, None))
317 | else:
318 | tmp = re_pun.split(blk)
319 | for x in tmp:
320 | if x != ' ' and x != '':
321 | if re_skip.match(x):
322 | con.append(('\n', 'LINEBREAK'))
323 | else:
324 | con.append((x, None))
325 |
326 | con_w, con_p1 = zip(*con)
327 | if pos:
328 | res = BrillTagger.tag(con_w)
329 |
330 | res_con = []
331 | for word_fin, pos_fin in res:
332 | if pos_fin == '-None-':
333 | if re_pun.match(word_fin):
334 | pos_fin = 'PUNCTUATION'
335 | elif re_eng.match(word_fin):
336 | pos_fin = 'FW'
337 | elif re_neu.match(word_fin):
338 | pos_fin = 'Neu'
339 | res_con.append((word_fin, pos_fin))
340 |
341 | word_seg = [i[0] for i in res_con]
342 | con_p2 = [i[1] for i in res_con]
343 |
344 | pos_con = []
345 | for pos_p1, pos_p2 in zip(con_p1, con_p2):
346 | if pos_p1 is None:
347 | pos_out = pos_p2
348 | else:
349 | pos_out = pos_p1
350 | pos_con.append(pos_out)
351 |
352 | output = zip(word_seg, pos_con)
353 | output_mod = []
354 | for word, pos in output:
355 | output_mod.append((word, pos))
356 | return output_mod
357 | else:
358 | return con_w
359 |
360 | def _seg_multi(self, texts, pos=False, pool_size=None):
361 | """Use `multiprocessing` to segmentate.
362 |
363 | Notice that this is a experimental method, which seems
364 | to work better with Python3.3+
365 | :param: texts: list of text
366 | :param: pos: show POS tags (default: False)
367 | :param: pool_size: pool size of `multiprocessing`
368 | """
369 | import sys
370 | assert (sys.version_info.major == 3 and sys.version_info.minor >= 3)
371 | with Pool(pool_size or cpu_count()) as pool:
372 | output = pool.map(partial(self.seg, pos=pos), texts)
373 |
374 | return output
375 |
376 |
377 | # TODO
378 | # add construction detection
379 | # 標點符號分割?
380 | # backslash 可能有問題
381 |
--------------------------------------------------------------------------------
/jseg/prob/initP.json:
--------------------------------------------------------------------------------
1 | {"M": -3.14e+100, "S": -1.2051185781089053, "B": -0.35618429962370884, "E": -3.14e+100}
--------------------------------------------------------------------------------
/jseg/prob/tranP.json:
--------------------------------------------------------------------------------
1 | {"M": {"M": -1.2603623820268226, "E": -0.33344856811948514}, "S": {"S": -0.6658631448798212, "B": -0.7211965654669841}, "B": {"M": -0.916290731874155, "E": -0.51082562376599}, "E": {"S": -0.8085250474669937, "B": -0.5897149736854513}}
--------------------------------------------------------------------------------
/jseg/ptt_encyc.txt:
--------------------------------------------------------------------------------
1 | 幫蓋
2 | 筆記
3 | 崩潰
4 | 北車
5 | 撥接魂
6 | 幫高調
7 | 八方雲集
8 | 笨舨板規定九
9 | 不要問!很恐怖!
10 | 不要問!很恐怖!
11 | 脾氣不好
12 | 沒空
13 | 墨鏡
14 | 媽!我在這
15 | 媽!我在這
16 | 馬路三寶
17 | 沒圖沒真相
18 | 苗栗小五郎
19 | 富奸
20 | ㄈㄈ尺
21 | 反指標
22 | 反詐騙
23 | 反串文
24 | 費雯
25 | 飛鴿傳書
26 | 放大絕
27 | 廢文大賽
28 | 分手擂台
29 | 發文不附圖,此風不可長
30 | 發文不附圖,此風不可長
31 | 大大
32 | 電梯向下
33 | 低調
34 | 打臉
35 | 釣魚文
36 | 地震文
37 | 道歉文
38 | 丹丹之亂
39 | 大同電鍋
40 | 淡定哥
41 | 淡定紅茶
42 | 淡淡的哀傷
43 | 蛋蛋的哀傷
44 | 大師兄黎明
45 | 頭推
46 | 推齊
47 | 台肯
48 | 臺肯
49 | 天龍人
50 | 天龍國
51 | 她,我可以
52 | 她,我可以
53 | 推文朝聖
54 | 鐵手套與藏獒
55 | 天大地大台科大
56 | 腿了
57 | 捏他
58 | 你誰啊,亂公告
59 | 你誰啊,亂公告
60 | 你我都推了一把
61 | 你是在大聲什麼啦
62 | 你嗎知道你在這裡發廢文嗎?
63 | 你嗎知道你在這裡發廢文嗎?
64 | 娘子,快和牛魔王出來看上帝
65 | 你好糟糕,請暫時不要和我說話
66 | 樓層
67 | 綠爆
68 | 藍爆
69 | 魯蛇
70 | 懶人包
71 | 聯合報
72 | 聯合重工
73 | 來大都找我吧
74 | 李組長眉頭一皺,發覺案情並不單純
75 | 李組長眉頭一皺,發覺案情並不單純
76 | 關燈
77 | 肛肛
78 | 肛肛好
79 | 關鍵字問卦
80 | 公海到了沒?
81 | 給開司一罐啤酒
82 | 跟鄉民認真就輸了
83 | 趕快推,不然別人會以為我們看不懂
84 | 趕快推,不然別人會以為我們看不懂
85 | 開燈
86 | 科科
87 | 可魯
88 | 空白文
89 | 卡卡獸
90 | 塊陶啊
91 | 靠么!圖咧!
92 | 靠么!圖咧!
93 | 恐怖連結
94 | 看笨版長知識
95 | 黃爆
96 | 厚問
97 | 好兇
98 | 華肯
99 | 紅的明顯
100 | 還好我都用pcman
101 | 還子的學習不能等
102 | 鹼民
103 | 頸推
104 | 甲甲
105 | 祭品文
106 | 集氣文
107 | 金排球
108 | 救援成功
109 | 鍵盤柯南
110 | 鍵盤小妹
111 | 接龍推文
112 | 記者快來抄
113 | 借轉可愛版
114 | 雞排珍奶板凳
115 | 欺負長ID
116 | 青藍爆
117 | 蛆蛆
118 | 搶頭香
119 | 強暴了
120 | 強者我朋友
121 | 鄉民
122 | 胸推
123 | 匈奴
124 | 瞎了
125 | 宵夜文
126 | 新警察
127 | 下手不知輕重
128 | 謝謝你9257
129 | 先承認你就是你朋友
130 | 先別說這個了,你聽過安麗嗎?
131 | 先別說這個了,你聽過安麗嗎?
132 | 喜歡你溫暖的文字,洋溢著滿滿的幸福
133 | 喜歡你溫暖的文字,洋溢著滿滿的幸福
134 | 吱吱
135 | 中肯
136 | 炸裂
137 | 週經文
138 | 肘擊她奶子
139 | 這就是人參啊
140 | 正常能量釋放
141 | 這是什麼巫術
142 | 直到我的膝蓋中了一箭
143 | 踹共
144 | 趁亂告白
145 | 出來面對
146 | 吃鍋貼
147 | 純推不下
148 | 神人
149 | 閃光
150 | 舒服文
151 | 十里坡劍神
152 | 數學老ㄙ常請假
153 | 日經文
154 | 人肉搜索
155 | 讓他生讓他生
156 | 讓他爆
157 | 讓他昇
158 | 紫爆
159 | 自介文
160 | 在我床上
161 | 在一起~在一起~
162 | 在一起~在一起~
163 | 左邊是超大型巨人,右邊是鎧之巨人
164 | 左邊是超大型巨人,右邊是鎧之巨人
165 | 藏頭文
166 | 酸民
167 | 四海遊龍
168 | 三重劉德華
169 | 喔~喔喔~喔喔~喔喔~爪爪
170 | 喔~喔喔~喔喔~喔喔~爪爪
171 | 養ID
172 | 一行文
173 | 葉佩雯
174 | 顏色不對
175 | 顏色對了
176 | 有下有推
177 | 雅量背影出師表
178 | 有神快拜
179 | 五樓
180 | 溫馨
181 | 溫拿
182 | 我難過
183 | 為看先猜
184 | 勿忘督割
185 | 萬用三熊圖
186 | 我開始相信你了
187 | 萬人響應,一人到場
188 | 萬人響應,一人到場
189 | 月經文
190 | 雨八令刀
191 | 原po是正妹
192 | 56家族
193 | 在你後面,他非常火
194 | 在你後面,他非常火
195 | +1
196 | 689
197 | 609
198 | 30cm
199 | BJ4
200 | bj4
201 | CD中
202 | END
203 | GG
204 | lag了
205 | LUB
206 | OP
207 | puma
--------------------------------------------------------------------------------
/jseg/test.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | from jieba import Jieba
3 | import multi
4 | import time
5 | import logging
6 | logger = logging.getLogger(__name__)
7 |
8 |
9 | text = '''網友瘋傳高市新興分局發給各單位協助慈濟骨髓捐贈者戶籍查訪「交辦單」,但不是說「警務大瘦身、大砍20項業務嗎?」引發基層員警議論紛紛;新興分局表示,交辦時間是17日,在警政署20日宣布簡化警察協辦業務前,未來對於類似服務需求將會視個案狀況審慎處理。
10 | 警政署20日公布20項簡化協辦業務,讓警察回歸專業,網路即瘋傳一張高市新興分局指派各警勤區員警,到骨髓捐贈者的戶籍地查訪、通告「交辦單」;網友們還在交辦單上打上「署長一直在減別人的爛攤子、高雄卻也是一直撿爛攤子」嘲諷引發群起議論。
11 | 不少網友留言:「這種急難救助,不是應該衛生單位訪查嗎?怎會是基層員警?」,也有人認為這太誇張,說「慈濟是收費的,不是免費幫人看病,這不是為民服務,根本是幫慈濟跑腿」;連慈濟義工都看不下,直言「這是我們慈濟人工作,不應交辦給警察,還用他們的警勤時間,做我們的事。」
12 | 新興分局指出,慈濟骨髓幹細胞中心請各縣警察局代為協尋已配對成功、但失去聯繫的骨髓幹細胞志願捐贈者,屬急難救助業務一環,盼結合警政分布系統,在最短時間內找尋該志願者,轉送通知信函,速與慈濟骨髓幹細胞中心聯絡,以辦理捐贈移植手續。
13 | 新興分局表示,尊重基層同仁想法及建議,對此類服務將儘量婉拒,但如遇到有緊急、非迅速找到配對捐贈者無法救命者,考慮人道精神,仍依個案來協助;絕不給基層員警造成任何工作負荷。'''
14 |
15 | texts = [text] * 1000
16 |
17 | j = Jieba()
18 |
19 |
20 | def timeit(method):
21 |
22 | def timed(*args, **kw):
23 | ts = time.time()
24 | result = method(*args, **kw)
25 | te = time.time()
26 | logger.info('%r %f sec' % (method.__name__, te-ts))
27 | return result
28 |
29 | return timed
30 |
31 |
32 | @timeit
33 | def single_test():
34 | con = []
35 | for text in texts:
36 | con.append(j.seg(text))
37 |
38 |
39 | @timeit
40 | def multi_test():
41 | multi.seg(texts, 8)
42 |
43 |
44 | if __name__ == '__main__':
45 | single_test()
46 | multi_test()
47 |
--------------------------------------------------------------------------------
/pip.txt:
--------------------------------------------------------------------------------
1 | nltk==3.0.3
2 | six==1.11.0
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Python setup settings."""
2 | from setuptools import setup, find_packages
3 |
4 | setup(
5 | name='jseg',
6 | version='0.0.4',
7 | packages=find_packages(),
8 | install_requires=[
9 | 'nltk==3.0.3',
10 | 'six==1.11.0',
11 | ],
12 | author='Aji Liu',
13 | author_email='amigcamel@gmail.com',
14 | description='''A modified version of Jieba''',
15 | package_data={
16 | 'jseg': [
17 | 'prob/*',
18 | 'dict.txt',
19 | 'emoall.txt',
20 | 'brill_tagger.pkl'
21 | ]
22 | }
23 | )
24 |
--------------------------------------------------------------------------------