├── VERSION
├── irctokens
    ├── py.typed
    ├── const.py
    ├── __init__.py
    ├── hostmask.py
    ├── formatting.py
    ├── stateful.py
    └── line.py
├── test
    ├── __init__.py
    ├── parser_tests.py
    ├── hostmask.py
    ├── stateful_encode.py
    ├── format.py
    ├── stateful_decode.py
    ├── tokenise.py
    └── _data
    │   ├── msg-join.yaml
    │   └── msg-split.yaml
├── .travis.yml
├── setup.py
├── LICENSE
├── README.md
└── .gitignore


/VERSION:
--------------------------------------------------------------------------------
1 | 2.0.2
2 | 


--------------------------------------------------------------------------------
/irctokens/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/irctokens/const.py:
--------------------------------------------------------------------------------
1 | TAG_UNESCAPED = ["\\",   " ",   ";",   "\r",  "\n"]
2 | TAG_ESCAPED   = ["\\\\", "\\s", "\\:", "\\r", "\\n"]
3 | 


--------------------------------------------------------------------------------
/irctokens/__init__.py:
--------------------------------------------------------------------------------
1 | from .line     import Line, build, tokenise
2 | from .hostmask import Hostmask, hostmask
3 | from .stateful import StatefulDecoder, StatefulEncoder
4 | 
5 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | from .tokenise        import *
2 | from .format          import *
3 | from .stateful_decode import *
4 | from .stateful_encode import *
5 | from .hostmask        import *
6 | from .parser_tests    import *
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | cache: pip
 3 | python:
 4 |   - "3.6"
 5 |   - "3.7"
 6 |   - "3.8"
 7 |   - "3.8-dev"
 8 | install:
 9 |   - pip3 install mypy pyyaml==5.3.1
10 | script:
11 |   - pip3 freeze
12 |   - mypy irctokens
13 |   - python3 -m unittest test
14 | 


--------------------------------------------------------------------------------
/irctokens/hostmask.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | class Hostmask(object):
 4 |     def __init__(self, source: str,
 5 |             nickname: str,
 6 |             username: Optional[str],
 7 |             hostname: Optional[str]):
 8 |         self._source = source
 9 |         self.nickname = nickname
10 |         self.username = username
11 |         self.hostname = hostname
12 | 
13 |     def __str__(self) -> str:
14 |         return self._source
15 |     def __repr__(self) -> str:
16 |         return f"Hostmask({self._source})"
17 |     def __eq__(self, other) -> bool:
18 |         if isinstance(other, Hostmask):
19 |             return str(self) == str(other)
20 |         else:
21 |             return False
22 | 
23 | def hostmask(source: str) -> Hostmask:
24 |     username, _, hostname = source.partition("@")
25 |     nickname, _, username = username.partition("!")
26 |     return Hostmask(
27 |         source,
28 |         nickname,
29 |         username or None,
30 |         hostname or None)
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | with open("VERSION", "r") as version_file:
 6 |     version = version_file.read().strip()
 7 | 
 8 | setuptools.setup(
 9 |     name="irctokens",
10 |     version=version,
11 |     author="jesopo",
12 |     author_email="pip@jesopo.uk",
13 |     description="RFC1459 and IRCv3 protocol tokeniser",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     url="https://github.com/jesopo/irctokens",
17 |     packages=setuptools.find_packages(),
18 |     package_data={"irctokens": ["py.typed"]},
19 |     classifiers=[
20 |         "Programming Language :: Python :: 3",
21 |         "License :: OSI Approved :: MIT License",
22 |         "Operating System :: OS Independent",
23 |         "Operating System :: POSIX",
24 |         "Operating System :: Microsoft :: Windows",
25 |         "Topic :: Communications :: Chat :: Internet Relay Chat"
26 |     ],
27 |     python_requires='>=3.6'
28 | )
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 jesopo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/irctokens/formatting.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional
 2 | from .const import TAG_ESCAPED, TAG_UNESCAPED
 3 | 
 4 | def _escape_tag(value: str):
 5 |     for i, char in enumerate(TAG_UNESCAPED):
 6 |         value = value.replace(char, TAG_ESCAPED[i])
 7 |     return value
 8 | 
 9 | def format(
10 |         tags:    Optional[Dict[str, str]],
11 |         source:  Optional[str],
12 |         command: str,
13 |         params:  List[str]):
14 |     outs: List[str] = []
15 |     if tags:
16 |         tags_str = []
17 |         for key in sorted(tags.keys()):
18 |             if tags[key]:
19 |                 value = tags[key]
20 |                 tags_str.append(f"{key}={_escape_tag(value)}")
21 |             else:
22 |                 tags_str.append(key)
23 |         outs.append(f"@{';'.join(tags_str)}")
24 | 
25 |     if source is not None:
26 |         outs.append(f":{source}")
27 |     outs.append(command)
28 | 
29 |     params = params.copy()
30 |     if params:
31 |         last = params.pop(-1)
32 |         for param in params:
33 |             if " " in param:
34 |                 raise ValueError("non last params cannot have spaces")
35 |             elif param.startswith(":"):
36 |                 raise ValueError("non last params cannot start with colon")
37 |         outs.extend(params)
38 | 
39 |         if (not last or
40 |                 " " in last or
41 |                 last.startswith(":")):
42 |             last = f":{last}"
43 |         outs.append(last)
44 |     return " ".join(outs)
45 | 


--------------------------------------------------------------------------------
/test/parser_tests.py:
--------------------------------------------------------------------------------
 1 | import os.path, unittest
 2 | import yaml
 3 | import irctokens
 4 | 
 5 | # run test cases sourced from:
 6 | # https://github.com/ircdocs/parser-tests
 7 | 
 8 | dir      = os.path.dirname(os.path.realpath(__file__))
 9 | data_dir = os.path.join(dir, "_data")
10 | 
11 | class ParserTestsSplit(unittest.TestCase):
12 |     def test_split(self):
13 |         data_path = os.path.join(data_dir, "msg-split.yaml")
14 |         with open(data_path) as data_file:
15 |             tests = yaml.safe_load(data_file.read())["tests"]
16 | 
17 |         for test in tests:
18 |             input = test["input"]
19 |             atoms = test["atoms"]
20 | 
21 |             tokens = irctokens.tokenise(input)
22 | 
23 |             self.assertEqual(tokens.tags,    atoms.get("tags", None))
24 |             self.assertEqual(tokens.source,  atoms.get("source", None))
25 |             self.assertEqual(tokens.command, atoms["verb"].upper())
26 |             self.assertEqual(tokens.params,  atoms.get("params", []))
27 | 
28 |     def test_join(self):
29 |         data_path = os.path.join(data_dir, "msg-join.yaml")
30 |         with open(data_path) as data_file:
31 |             tests = yaml.safe_load(data_file.read())["tests"]
32 | 
33 |         for test in tests:
34 |             atoms   = test["atoms"]
35 |             matches = test["matches"]
36 | 
37 |             line = irctokens.build(
38 |                 atoms["verb"],
39 |                 atoms.get("params", []),
40 |                 source=atoms.get("source", None),
41 |                 tags=atoms.get("tags", None)).format()
42 | 
43 |             self.assertIn(line, matches)
44 | 


--------------------------------------------------------------------------------
/irctokens/stateful.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | from .line  import Line, tokenise
 3 | 
 4 | class StatefulDecoder(object):
 5 |     def __init__(self, encoding: str="utf8", fallback: str="latin-1"):
 6 |         self._encoding = encoding
 7 |         self._fallback = fallback
 8 |         self.clear()
 9 | 
10 |     def clear(self):
11 |         self._buffer = b""
12 | 
13 |     def pending(self) -> bytes:
14 |         return self._buffer
15 | 
16 |     def push(self, data: bytes) -> Optional[List[Line]]:
17 |         if not data:
18 |             return None
19 | 
20 |         self._buffer += data
21 |         lines_b = [l.strip(b"\r") for l in self._buffer.split(b"\n")]
22 |         self._buffer = lines_b.pop(-1)
23 | 
24 |         lines: List[Line] = []
25 |         for line in lines_b:
26 |             lines.append(tokenise(line, self._encoding, self._fallback))
27 |         return lines
28 | 
29 | class StatefulEncoder(object):
30 |     def __init__(self, encoding: str="utf8"):
31 |         self._encoding = encoding
32 |         self.clear()
33 | 
34 |     def clear(self):
35 |         self._buffer = b""
36 |         self._buffered_lines: List[Line] = []
37 | 
38 |     def pending(self) -> bytes:
39 |         return self._buffer
40 | 
41 |     def push(self, line: Line):
42 |         self._buffer += f"{line.format()}\r\n".encode(self._encoding)
43 |         self._buffered_lines.append(line)
44 | 
45 |     def pop(self, byte_count: int):
46 |         sent = self._buffer[:byte_count].count(b"\n")
47 |         self._buffer = self._buffer[byte_count:]
48 |         return [self._buffered_lines.pop(0) for _ in range(sent)]
49 | 


--------------------------------------------------------------------------------
/test/hostmask.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import irctokens
 3 | 
 4 | class HostmaskTest(unittest.TestCase):
 5 |     def test_all(self):
 6 |         hostmask = irctokens.hostmask("nick!user@host")
 7 |         self.assertEqual(hostmask.nickname, "nick")
 8 |         self.assertEqual(hostmask.username, "user")
 9 |         self.assertEqual(hostmask.hostname, "host")
10 | 
11 |     def test_no_hostname(self):
12 |         hostmask = irctokens.hostmask("nick!user")
13 |         self.assertEqual(hostmask.nickname, "nick")
14 |         self.assertEqual(hostmask.username, "user")
15 |         self.assertIsNone(hostmask.hostname)
16 | 
17 |     def test_no_ident(self):
18 |         hostmask = irctokens.hostmask("nick@host")
19 |         self.assertEqual(hostmask.nickname, "nick")
20 |         self.assertIsNone(hostmask.username)
21 |         self.assertEqual(hostmask.hostname, "host")
22 | 
23 |     def test_only_nickname(self):
24 |         hostmask = irctokens.hostmask("nick")
25 |         self.assertEqual(hostmask.nickname, "nick")
26 |         self.assertIsNone(hostmask.username)
27 |         self.assertIsNone(hostmask.hostname)
28 | 
29 |     def test_line(self):
30 |         line = irctokens.tokenise(":nick!user@host PRIVMSG #channel hello")
31 |         hostmask = irctokens.hostmask("nick!user@host")
32 |         self.assertEqual(line.hostmask, hostmask)
33 |         self.assertEqual(line.hostmask.nickname, "nick")
34 |         self.assertEqual(line.hostmask.username, "user")
35 |         self.assertEqual(line.hostmask.hostname, "host")
36 | 
37 |     def test_none_source(self):
38 |         line = irctokens.tokenise("PRIVMSG #channel hello")
39 |         def _hostmask():
40 |             line.hostmask
41 |         self.assertRaises(ValueError, _hostmask)
42 | 


--------------------------------------------------------------------------------
/test/stateful_encode.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import irctokens
 3 | 
 4 | class EncodeTestPush(unittest.TestCase):
 5 |     def test(self):
 6 |         e = irctokens.StatefulEncoder()
 7 |         line = irctokens.tokenise("PRIVMSG #channel hello")
 8 |         e.push(line)
 9 |         self.assertEqual(e.pending(), b"PRIVMSG #channel hello\r\n")
10 | 
11 | class EncodeTestPop(unittest.TestCase):
12 |     def test_partial(self):
13 |         e = irctokens.StatefulEncoder()
14 |         line = irctokens.tokenise("PRIVMSG #channel hello")
15 |         e.push(line)
16 |         e.pop(len(b"PRIVMSG #channel hello"))
17 |         self.assertEqual(e.pending(), b"\r\n")
18 | 
19 |     def test_returned(self):
20 |         e = irctokens.StatefulEncoder()
21 |         line = irctokens.tokenise("PRIVMSG #channel hello")
22 |         e.push(line)
23 |         e.push(line)
24 |         lines = e.pop(len(b"PRIVMSG #channel hello\r\nPRIVMSG"))
25 |         self.assertEqual(len(lines), 1)
26 |         self.assertEqual(lines[0], line)
27 | 
28 |     def test_none_returned(self):
29 |         e = irctokens.StatefulEncoder()
30 |         line = irctokens.tokenise("PRIVMSG #channel hello")
31 |         e.push(line)
32 |         lines = e.pop(1)
33 |         self.assertEqual(len(lines), 0)
34 | 
35 | class EncodeTestClear(unittest.TestCase):
36 |     def test(self):
37 |         e = irctokens.StatefulEncoder()
38 |         e.push(irctokens.tokenise("PRIVMSG #channel hello"))
39 |         e.clear()
40 |         self.assertEqual(e.pending(), b"")
41 | 
42 | class EncodeTestEncoding(unittest.TestCase):
43 |     def test(self):
44 |         e = irctokens.StatefulEncoder(encoding="iso-8859-2")
45 |         e.push(irctokens.tokenise("PRIVMSG #channel :hello Č"))
46 |         self.assertEqual(e.pending(),
47 |             "PRIVMSG #channel :hello Č\r\n".encode("iso-8859-2"))
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # irctokens
 2 | 
 3 | [![Build Status](https://travis-ci.org/jesopo/irctokens.svg?branch=master)](https://travis-ci.org/jesopo/irctokens)
 4 | 
 5 | ## rationale
 6 | 
 7 | there's far too many IRC client implementations out in the world that do not
 8 | tokenise data correctly and thus fall victim to things like colons either being
 9 | where you don't expect them or not being where you expect them.
10 | 
11 | ## usage
12 | 
13 | ### installation
14 | 
15 | `$ pip3 install irctokens`
16 | 
17 | ### tokenisation
18 | ```python
19 | >>> import irctokens
20 | >>> line = irctokens.tokenise(
21 | ...     "@id=123 :jess!~jess@hostname PRIVMSG #chat :hello there!")
22 | >>>
23 | >>> line.tags
24 | {'id': '123'}
25 | >>> line.source
26 | 'jess!~jess@hostname'
27 | >>> line.hostmask
28 | Hostmask(nickname='jess', username='~jess', hostname='hostname')
29 | >>> line.command
30 | 'PRIVMSG'
31 | >>> line.params
32 | ['#chat', 'hello there!']
33 | ```
34 | 
35 | ### formatting
36 | 
37 | ```python
38 | >>> irctokens.build("USER", ["user", "0", "*", "real name"]).format()
39 | 'USER user 0 * :real name'
40 | ```
41 | 
42 | ### stateful
43 | 
44 | below is an example of a fully socket-wise safe IRC client connection that will
45 | connect and join a channel. both protocol sending and receiving are handled by
46 | irctokens.
47 | 
48 | ```python
49 | 
50 | import irctokens, socket
51 | 
52 | NICK = "nickname"
53 | CHAN = "#channel"
54 | 
55 | d = irctokens.StatefulDecoder()
56 | e = irctokens.StatefulEncoder()
57 | s = socket.socket()
58 | s.connect(("127.0.0.1", 6667))
59 | 
60 | def _send(line):
61 |     print(f"> {line.format()}")
62 |     e.push(line)
63 |     while e.pending():
64 |         e.pop(s.send(e.pending()))
65 | 
66 | _send(irctokens.build("USER", ["username", "0", "*", "real name"]))
67 | _send(irctokens.build("NICK", [NICK]))
68 | 
69 | while True:
70 |     lines = d.push(s.recv(1024))
71 |     if lines == None:
72 |         print("! disconnected")
73 |         break
74 | 
75 |     for line in lines:
76 |         print(f"< {line.format()}")
77 |         if line.command == "PING":
78 |             to_send = irctokens.build("PONG", [line.params[0]])
79 |             _send(to_send)
80 | 
81 |         elif line.command == "001":
82 |             to_send = irctokens.build("JOIN", [CHAN])
83 |             _send(to_send)
84 | ```
85 | 
86 | ## contact
87 | 
88 | Come say hi at `#irctokens` on irc.libera.chat
89 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/test/format.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import irctokens
 3 | 
 4 | class FormatTestTags(unittest.TestCase):
 5 |     def test(self):
 6 |         line = irctokens.build("PRIVMSG", ["#channel", "hello"],
 7 |             tags={"id": "\\" + " " + ";" + "\r\n"}).format()
 8 |         self.assertEqual(line, "@id=\\\\\\s\\:\\r\\n PRIVMSG #channel hello")
 9 | 
10 |     def test_missing(self):
11 |         line = irctokens.build("PRIVMSG", ["#channel", "hello"]).format()
12 |         self.assertEqual(line, "PRIVMSG #channel hello")
13 | 
14 |     def test_none_value(self):
15 |         line = irctokens.build("PRIVMSG", ["#channel", "hello"],
16 |             tags={"a": None}).format()
17 |         self.assertEqual(line, "@a PRIVMSG #channel hello")
18 | 
19 |     def test_empty_value(self):
20 |         line = irctokens.build("PRIVMSG", ["#channel", "hello"], tags={"a": ""}
21 |             ).format()
22 |         self.assertEqual(line, "@a PRIVMSG #channel hello")
23 | 
24 | class FormatTestSource(unittest.TestCase):
25 |     def test(self):
26 |         line = irctokens.build("PRIVMSG", ["#channel", "hello"],
27 |             source="nick!user@host").format()
28 |         self.assertEqual(line, ":nick!user@host PRIVMSG #channel hello")
29 | 
30 | class FormatTestCommand(unittest.TestCase):
31 |     def test_lowercase(self):
32 |         line = irctokens.build("privmsg").format()
33 |         self.assertEqual(line, "privmsg")
34 |     def test_uppercase(self):
35 |         line = irctokens.build("PRIVMSG").format()
36 |         self.assertEqual(line, "PRIVMSG")
37 | 
38 | class FormatTestTrailing(unittest.TestCase):
39 |     def test_space(self):
40 |         line = irctokens.build("PRIVMSG", ["#channel", "hello world"]).format()
41 |         self.assertEqual(line, "PRIVMSG #channel :hello world")
42 | 
43 |     def test_no_space(self):
44 |         line = irctokens.build("PRIVMSG", ["#channel", "helloworld"]).format()
45 |         self.assertEqual(line, "PRIVMSG #channel helloworld")
46 | 
47 |     def test_double_colon(self):
48 |         line = irctokens.build("PRIVMSG", ["#channel", ":helloworld"]).format()
49 |         self.assertEqual(line, "PRIVMSG #channel ::helloworld")
50 | 
51 | class FormatTestInvalidParam(unittest.TestCase):
52 |     def test_non_last_space(self):
53 |         def _inner():
54 |             irctokens.build("USER", ["user", "0 *", "real name"]).format()
55 |         self.assertRaises(ValueError, _inner)
56 | 
57 |     def test_non_last_colon(self):
58 |         def _inner():
59 |             irctokens.build("PRIVMSG", [":#channel", "hello"]).format()
60 |         self.assertRaises(ValueError, _inner)
61 | 


--------------------------------------------------------------------------------
/test/stateful_decode.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import irctokens
 3 | 
 4 | class DecodeTestPartial(unittest.TestCase):
 5 |     def test(self):
 6 |         d = irctokens.StatefulDecoder()
 7 |         lines = d.push(b"PRIVMSG ")
 8 |         self.assertEqual(lines, [])
 9 | 
10 |         lines = d.push(b"#channel hello\r\n")
11 |         self.assertEqual(len(lines), 1)
12 |         line = irctokens.tokenise("PRIVMSG #channel hello")
13 |         self.assertEqual(lines, [line])
14 | 
15 | class DecodeTestMultiple(unittest.TestCase):
16 |     def test(self):
17 |         d = irctokens.StatefulDecoder()
18 |         lines = d.push(b"PRIVMSG #channel1 hello\r\n"
19 |                        b"PRIVMSG #channel2 hello\r\n")
20 |         self.assertEqual(len(lines), 2)
21 | 
22 |         line1 = irctokens.tokenise("PRIVMSG #channel1 hello")
23 |         line2 = irctokens.tokenise("PRIVMSG #channel2 hello")
24 |         self.assertEqual(lines[0], line1)
25 |         self.assertEqual(lines[1], line2)
26 | 
27 | class DecodeTestEncoding(unittest.TestCase):
28 |     def test(self):
29 |         d = irctokens.StatefulDecoder(encoding="iso-8859-2")
30 |         lines = d.push("PRIVMSG #channel :hello Č\r\n".encode("iso-8859-2"))
31 |         line = irctokens.tokenise("PRIVMSG #channel :hello Č")
32 |         self.assertEqual(lines[0], line)
33 |     def test_fallback(self):
34 |         d = irctokens.StatefulDecoder(fallback="latin-1")
35 |         lines = d.push("PRIVMSG #channel hélló\r\n".encode("latin-1"))
36 |         self.assertEqual(len(lines), 1)
37 |         line = irctokens.tokenise("PRIVMSG #channel hélló")
38 |         self.assertEqual(lines[0], line)
39 | 
40 | class DecodeTestEmpty(unittest.TestCase):
41 |     def test_immediate(self):
42 |         d = irctokens.StatefulDecoder()
43 |         lines = d.push(b"")
44 |         self.assertIsNone(lines)
45 | 
46 |     def test_buffer_unfinished(self):
47 |         d = irctokens.StatefulDecoder()
48 |         d.push(b"PRIVMSG #channel hello")
49 |         lines = d.push(b"")
50 |         self.assertIsNone(lines)
51 | 
52 | class DecodeTestClear(unittest.TestCase):
53 |     def test(self):
54 |         d = irctokens.StatefulDecoder()
55 |         d.push(b"PRIVMSG ")
56 |         d.clear()
57 |         self.assertEqual(d.pending(), b"")
58 | 
59 | class DecodeTestTagEncodingMismatch(unittest.TestCase):
60 |     def test(self):
61 |         d = irctokens.StatefulDecoder()
62 |         d.push("@asd=á ".encode("utf8"))
63 |         lines = d.push("PRIVMSG #chan :á\r\n".encode("latin-1"))
64 | 
65 |         self.assertEqual(lines[0].params[1],   "á")
66 |         self.assertEqual(lines[0].tags["asd"], "á")
67 | 


--------------------------------------------------------------------------------
/irctokens/line.py:
--------------------------------------------------------------------------------
  1 | from typing      import Dict, List, Optional, Union
  2 | from .const      import TAG_ESCAPED, TAG_UNESCAPED
  3 | from .hostmask   import Hostmask, hostmask
  4 | from .formatting import format as format_
  5 | 
  6 | class Line(object):
  7 |     def __init__(self,
  8 |             tags:    Optional[Dict[str, str]],
  9 |             source:  Optional[str],
 10 |             command: str,
 11 |             params:  List[str]):
 12 |         self.tags    = tags
 13 |         self.source  = source
 14 |         self.command = command
 15 |         self.params  = params
 16 | 
 17 |     def __eq__(self, other) -> bool:
 18 |         if isinstance(other, Line):
 19 |             return self.format() == other.format()
 20 |         else:
 21 |             return False
 22 |     def __repr__(self) -> str:
 23 |         return (f"Line(tag={self.tags!r}, source={self.source!r}"
 24 |             f", command={self.command!r}, params={self.params!r})")
 25 | 
 26 |     _hostmask: Optional[Hostmask] = None
 27 |     @property
 28 |     def hostmask(self) -> Hostmask:
 29 |         if self.source is not None:
 30 |             if self._hostmask is None:
 31 |                 self._hostmask = hostmask(self.source)
 32 |             return self._hostmask
 33 |         else:
 34 |             raise ValueError("cannot parse hostmask from null source")
 35 | 
 36 |     def format(self) -> str:
 37 |         return format_(self.tags, self.source, self.command, self.params)
 38 | 
 39 |     def with_source(self, source: str) -> "Line":
 40 |         return Line(self.tags, source, self.command, self.params)
 41 |     def copy(self) -> "Line":
 42 |         return Line(self.tags, self.source, self.command, self.params)
 43 | 
 44 | def build(
 45 |         command: str,
 46 |         params:  List[str]=[],
 47 |         source:  Optional[str]=None,
 48 |         tags:    Optional[Dict[str, str]]=None
 49 |         ) -> Line:
 50 |     return Line(tags, source, command, params)
 51 | 
 52 | def _unescape_tag(value: str) -> str:
 53 |     unescaped, escaped = "", list(value)
 54 |     while escaped:
 55 |         current = escaped.pop(0)
 56 |         if current == "\\":
 57 |             if escaped:
 58 |                 next = escaped.pop(0)
 59 |                 duo = current+next
 60 |                 if duo in TAG_ESCAPED:
 61 |                     index = TAG_ESCAPED.index(duo)
 62 |                     unescaped += TAG_UNESCAPED[index]
 63 |                 else:
 64 |                     unescaped += next
 65 |         else:
 66 |             unescaped += current
 67 |     return unescaped
 68 | 
 69 | def _tokenise(line: str) -> Line:
 70 |     tags: Optional[Dict[str, str]] = None
 71 |     if line[0] == "@":
 72 |         tags_s, _, line = line.partition(" ")
 73 |         tags = {}
 74 |         for part in tags_s[1:].split(";"):
 75 |             key, _, value = part.partition("=")
 76 |             tags[key]     = _unescape_tag(value)
 77 | 
 78 |     line, trailing_sep, trailing = line.partition(" :")
 79 |     params = list(filter(bool, line.split(" ")))
 80 | 
 81 |     source: Optional[str] = None
 82 |     if params[0][0] == ":":
 83 |         source = params.pop(0)[1:]
 84 | 
 85 |     if not params:
 86 |         raise ValueError("Cannot tokenise command-less line")
 87 |     command = params.pop(0).upper()
 88 | 
 89 |     if trailing_sep:
 90 |         params.append(trailing)
 91 | 
 92 |     return Line(tags, source, command, params)
 93 | 
 94 | def tokenise(
 95 |         line:     Union[str, bytes],
 96 |         encoding: str="utf8",
 97 |         fallback: str="latin-1"
 98 |         ) -> Line:
 99 | 
100 |     dline: str = ""
101 |     if isinstance(line, bytes):
102 |         if line[0] == ord(b"@"):
103 |             tags_b, sep, line = line.partition(b" ")
104 |             dline += (tags_b+sep).decode("utf8")
105 |         try:
106 |             dline += line.decode(encoding)
107 |         except UnicodeDecodeError:
108 |             dline += line.decode(fallback)
109 |     else:
110 |         dline = line
111 | 
112 |     for badchar in set(dline) & {"\x00", "\r", "\n"}:
113 |         badindex = dline.find(badchar)
114 |         if not badindex == -1:
115 |             # truncate before this bad character
116 |             dline = dline[:badindex]
117 | 
118 |     return _tokenise(dline)
119 | 


--------------------------------------------------------------------------------
/test/tokenise.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import irctokens
  3 | 
  4 | class TokenTestTags(unittest.TestCase):
  5 |     def test_missing(self):
  6 |         line = irctokens.tokenise("PRIVMSG #channel")
  7 |         self.assertIsNone(line.tags)
  8 | 
  9 |     def test_value_missing(self):
 10 |         line = irctokens.tokenise("@id= PRIVMSG #channel")
 11 |         self.assertEqual(line.tags["id"], "")
 12 | 
 13 |     def test_equal_missing(self):
 14 |         line = irctokens.tokenise("@id PRIVMSG #channel")
 15 |         self.assertEqual(line.tags["id"], "")
 16 | 
 17 |     def test_unescape(self):
 18 |         line = irctokens.tokenise(r"@id=1\\\:\r\n\s2 PRIVMSG #channel")
 19 |         self.assertEqual(line.tags["id"], "1\\;\r\n 2")
 20 | 
 21 |     def test_overlap(self):
 22 |         line = irctokens.tokenise(r"@id=1\\\s\\s PRIVMSG #channel")
 23 |         self.assertEqual(line.tags["id"], "1\\ \\s")
 24 | 
 25 |     def test_lone_end_slash(self):
 26 |         line = irctokens.tokenise("@id=1\\ PRIVMSG #channel")
 27 |         self.assertEqual(line.tags["id"], "1")
 28 | 
 29 | class TokenTestSource(unittest.TestCase):
 30 |     def test_without_tags(self):
 31 |         line = irctokens.tokenise(":nick!user@host PRIVMSG #channel")
 32 |         self.assertEqual(line.source, "nick!user@host")
 33 | 
 34 |     def test_with_tags(self):
 35 |         line = irctokens.tokenise("@id=123 :nick!user@host PRIVMSG #channel")
 36 |         self.assertEqual(line.source, "nick!user@host")
 37 | 
 38 |     def test_missing_without_tags(self):
 39 |         line = irctokens.tokenise("PRIVMSG #channel")
 40 |         self.assertIsNone(line.source)
 41 | 
 42 |     def test_missing_with_tags(self):
 43 |         line = irctokens.tokenise("@id=123 PRIVMSG #channel")
 44 |         self.assertIsNone(line.source)
 45 | 
 46 | class TokenTestCommand(unittest.TestCase):
 47 |     def test_lowercase(self):
 48 |         line = irctokens.tokenise("privmsg #channel")
 49 |         self.assertEqual(line.command, "PRIVMSG")
 50 | 
 51 | class TokenTestParams(unittest.TestCase):
 52 |     def test_trailing(self):
 53 |         line = irctokens.tokenise("PRIVMSG #channel :hello world")
 54 |         self.assertEqual(line.params, ["#channel", "hello world"])
 55 | 
 56 |     def test_only_trailing(self):
 57 |         line = irctokens.tokenise("PRIVMSG :hello world")
 58 |         self.assertEqual(line.params, ["hello world"])
 59 | 
 60 |     def test_no_params(self):
 61 |         line = irctokens.tokenise("PRIVMSG")
 62 |         self.assertEqual(line.command, "PRIVMSG")
 63 |         self.assertEqual(line.params, [])
 64 | 
 65 | class TokenTestAll(unittest.TestCase):
 66 |     def test_all(self):
 67 |         line = irctokens.tokenise(
 68 |             "@id=123 :nick!user@host PRIVMSG #channel :hello world")
 69 |         self.assertEqual(line.tags, {"id": "123"})
 70 |         self.assertEqual(line.source, "nick!user@host")
 71 |         self.assertEqual(line.command, "PRIVMSG")
 72 |         self.assertEqual(line.params, ["#channel", "hello world"])
 73 | 
 74 | class TokenTestTruncate(unittest.TestCase):
 75 |     def test_null(self):
 76 |         line = irctokens.tokenise(
 77 |             ":nick!user@host PRIVMSG #channel :hello\x00 world")
 78 |         self.assertEqual(line.params, ["#channel", "hello"])
 79 | 
 80 |     def test_cr(self):
 81 |         line = irctokens.tokenise(
 82 |             ":nick!user@host PRIVMSG #channel :hello\r world")
 83 |         self.assertEqual(line.params, ["#channel", "hello"])
 84 | 
 85 |     def test_lf(self):
 86 |         line = irctokens.tokenise(
 87 |             ":nick!user@host PRIVMSG #channel :hello\n world")
 88 |         self.assertEqual(line.params, ["#channel", "hello"])
 89 | 
 90 | class TokenTestNoCommand(unittest.TestCase):
 91 |     def test(self):
 92 |         def _test1():
 93 |             line = irctokens.tokenise(":n!u@h")
 94 |         def _test2():
 95 |             line = irctokens.tokenise("@tag=1 :n!u@h")
 96 | 
 97 |         self.assertRaises(ValueError, _test1)
 98 |         self.assertRaises(ValueError, _test2)
 99 | 
100 | class TokenTestBytes(unittest.TestCase):
101 |     def test(self):
102 |         _str   = irctokens.tokenise("@a=1 :n!u@h PRIVMSG #chan :hello word")
103 |         _bytes = irctokens.tokenise(b"@a=1 :n!u@h PRIVMSG #chan :hello word")
104 | 
105 |         self.assertEqual(_str, _bytes)
106 | 


--------------------------------------------------------------------------------
/test/_data/msg-join.yaml:
--------------------------------------------------------------------------------
  1 | # IRC parser tests
  2 | # joining atoms into sendable messages
  3 | 
  4 | # Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
  5 | #
  6 | # To the extent possible under law, the author(s) have dedicated all copyright
  7 | # and related and neighboring rights to this software to the public domain
  8 | # worldwide. This software is distributed without any warranty.
  9 | #
 10 | # You should have received a copy of the CC0 Public Domain Dedication along
 11 | # with this software. If not, see
 12 | # <http://creativecommons.org/publicdomain/zero/1.0/>.
 13 | 
 14 | # some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
 15 | #   https://github.com/grawity/code/tree/master/lib/tests
 16 | # some of the tests here originate from Mozilla's test vectors, which is public domain
 17 | #   https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
 18 | # some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
 19 | #   https://github.com/SaberUK/ircparser/tree/master/test
 20 | 
 21 | tests:
 22 |   # the desc string holds a description of the test, if it exists
 23 | 
 24 |   # the atoms dict has the keys:
 25 |   #   * tags: tags dict
 26 |   #       tags with no value are an empty string
 27 |   #   * source: source string, without single leading colon
 28 |   #   * verb: verb string
 29 |   #   * params: params split up as a list
 30 |   # if the params key does not exist, assume it is empty
 31 |   # if any other keys do no exist, assume they are null
 32 |   # a key that is null does not exist or is not specified with the
 33 |   #   given input string
 34 | 
 35 |   # matches is a list of messages that match
 36 | 
 37 |   # simple tests
 38 |   - desc: Simple test with verb and params.
 39 |     atoms:
 40 |       verb: "foo"
 41 |       params:
 42 |         - "bar"
 43 |         - "baz"
 44 |         - "asdf"
 45 |     matches:
 46 |       - "foo bar baz asdf"
 47 |       - "foo bar baz :asdf"
 48 | 
 49 |   # with no regular params
 50 |   - desc: Simple test with source and no params.
 51 |     atoms:
 52 |       source: "src"
 53 |       verb: "AWAY"
 54 |     matches:
 55 |       - ":src AWAY"
 56 | 
 57 |   - desc: Simple test with source and empty trailing param.
 58 |     atoms:
 59 |       source: "src"
 60 |       verb: "AWAY"
 61 |       params:
 62 |         - ""
 63 |     matches:
 64 |       - ":src AWAY :"
 65 | 
 66 |   # with source
 67 |   - desc: Simple test with source.
 68 |     atoms:
 69 |       source: "coolguy"
 70 |       verb: "foo"
 71 |       params:
 72 |         - "bar"
 73 |         - "baz"
 74 |         - "asdf"
 75 |     matches:
 76 |       - ":coolguy foo bar baz asdf"
 77 |       - ":coolguy foo bar baz :asdf"
 78 | 
 79 |   # with trailing param
 80 |   - desc: Simple test with trailing param.
 81 |     atoms:
 82 |       verb: "foo"
 83 |       params:
 84 |         - "bar"
 85 |         - "baz"
 86 |         - "asdf quux"
 87 |     matches:
 88 |       - "foo bar baz :asdf quux"
 89 | 
 90 |   - desc: Simple test with empty trailing param.
 91 |     atoms:
 92 |       verb: "foo"
 93 |       params:
 94 |         - "bar"
 95 |         - "baz"
 96 |         - ""
 97 |     matches:
 98 |       - "foo bar baz :"
 99 | 
100 |   - desc: Simple test with trailing param containing colon.
101 |     atoms:
102 |       verb: "foo"
103 |       params:
104 |         - "bar"
105 |         - "baz"
106 |         - ":asdf"
107 |     matches:
108 |       - "foo bar baz ::asdf"
109 | 
110 |   # with source and trailing param
111 |   - desc: Test with source and trailing param.
112 |     atoms:
113 |       source: "coolguy"
114 |       verb: "foo"
115 |       params:
116 |         - "bar"
117 |         - "baz"
118 |         - "asdf quux"
119 |     matches:
120 |       - ":coolguy foo bar baz :asdf quux"
121 | 
122 |   - desc: Test with trailing containing beginning+end whitespace.
123 |     atoms:
124 |       source: "coolguy"
125 |       verb: "foo"
126 |       params:
127 |         - "bar"
128 |         - "baz"
129 |         - "  asdf quux "
130 |     matches:
131 |       - ":coolguy foo bar baz :  asdf quux "
132 | 
133 |   - desc: Test with trailing containing what looks like another trailing param.
134 |     atoms:
135 |       source: "coolguy"
136 |       verb: "PRIVMSG"
137 |       params:
138 |         - "bar"
139 |         - "lol :) "
140 |     matches:
141 |       - ":coolguy PRIVMSG bar :lol :) "
142 | 
143 |   - desc: Simple test with source and empty trailing.
144 |     atoms:
145 |       source: "coolguy"
146 |       verb: "foo"
147 |       params:
148 |         - "bar"
149 |         - "baz"
150 |         - ""
151 |     matches:
152 |       - ":coolguy foo bar baz :"
153 | 
154 |   - desc: Trailing contains only spaces.
155 |     atoms:
156 |       source: "coolguy"
157 |       verb: "foo"
158 |       params:
159 |         - "bar"
160 |         - "baz"
161 |         - "  "
162 |     matches:
163 |       - ":coolguy foo bar baz :  "
164 | 
165 |   - desc: Param containing tab (tab is not considered SPACE for message splitting).
166 |     atoms:
167 |       source: "coolguy"
168 |       verb: "foo"
169 |       params:
170 |         - "b\tar"
171 |         - "baz"
172 |     matches:
173 |       - ":coolguy foo b\tar baz"
174 |       - ":coolguy foo b\tar :baz"
175 | 
176 |   # with tags
177 |   - desc: Tag with no value and space-filled trailing.
178 |     atoms:
179 |       tags:
180 |         "asd": ""
181 |       source: "coolguy"
182 |       verb: "foo"
183 |       params:
184 |         - "bar"
185 |         - "baz"
186 |         - "  "
187 |     matches:
188 |       - "@asd :coolguy foo bar baz :  "
189 | 
190 |   - desc: Tags with escaped values.
191 |     atoms:
192 |       verb: "foo"
193 |       tags:
194 |         "a": "b\\and\nk"
195 |         "d": "gh;764"
196 |     matches:
197 |       - "@a=b\\\\and\\nk;d=gh\\:764 foo"
198 |       - "@d=gh\\:764;a=b\\\\and\\nk foo"
199 | 
200 |   - desc: Tags with escaped values and params.
201 |     atoms:
202 |       verb: "foo"
203 |       tags:
204 |         "a": "b\\and\nk"
205 |         "d": "gh;764"
206 |       params:
207 |         - "par1"
208 |         - "par2"
209 |     matches:
210 |       - "@a=b\\\\and\\nk;d=gh\\:764 foo par1 par2"
211 |       - "@a=b\\\\and\\nk;d=gh\\:764 foo par1 :par2"
212 |       - "@d=gh\\:764;a=b\\\\and\\nk foo par1 par2"
213 |       - "@d=gh\\:764;a=b\\\\and\\nk foo par1 :par2"
214 | 
215 |   - desc: Tag with long, strange values (including LF and newline).
216 |     atoms:
217 |       tags:
218 |         foo: "\\\\;\\s \r\n"
219 |       verb: "COMMAND"
220 |     matches:
221 |       - "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
222 | 


--------------------------------------------------------------------------------
/test/_data/msg-split.yaml:
--------------------------------------------------------------------------------
  1 | # IRC parser tests
  2 | # splitting messages into usable atoms
  3 | 
  4 | # Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
  5 | #
  6 | # To the extent possible under law, the author(s) have dedicated all copyright
  7 | # and related and neighboring rights to this software to the public domain
  8 | # worldwide. This software is distributed without any warranty.
  9 | #
 10 | # You should have received a copy of the CC0 Public Domain Dedication along
 11 | # with this software. If not, see
 12 | # <http://creativecommons.org/publicdomain/zero/1.0/>.
 13 | 
 14 | # some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
 15 | #   https://github.com/grawity/code/tree/master/lib/tests
 16 | # some of the tests here originate from Mozilla's test vectors, which is public domain
 17 | #   https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
 18 | # some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
 19 | #   https://github.com/SaberUK/ircparser/tree/master/test
 20 | 
 21 | # we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
 22 | #   The prefix, command, and all parameters are
 23 | #   separated by one (or more) ASCII space character(s) (0x20).
 24 | # because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
 25 | 
 26 | tests:
 27 |   # input is the string coming directly from the server to parse
 28 | 
 29 |   # the atoms dict has the keys:
 30 |   #   * tags: tags dict
 31 |   #       tags with no value are an empty string
 32 |   #   * source: source string, without single leading colon
 33 |   #   * verb: verb string
 34 |   #   * params: params split up as a list
 35 |   # if the params key does not exist, assume it is empty
 36 |   # if any other keys do no exist, assume they are null
 37 |   # a key that is null does not exist or is not specified with the
 38 |   #   given input string
 39 | 
 40 |   # simple
 41 |   - input: "foo bar baz asdf"
 42 |     atoms:
 43 |       verb: "foo"
 44 |       params:
 45 |         - "bar"
 46 |         - "baz"
 47 |         - "asdf"
 48 | 
 49 |   # with source
 50 |   - input: ":coolguy foo bar baz asdf"
 51 |     atoms:
 52 |       source: "coolguy"
 53 |       verb: "foo"
 54 |       params:
 55 |         - "bar"
 56 |         - "baz"
 57 |         - "asdf"
 58 | 
 59 |   # with trailing param
 60 |   - input: "foo bar baz :asdf quux"
 61 |     atoms:
 62 |       verb: "foo"
 63 |       params:
 64 |         - "bar"
 65 |         - "baz"
 66 |         - "asdf quux"
 67 | 
 68 |   - input: "foo bar baz :"
 69 |     atoms:
 70 |       verb: "foo"
 71 |       params:
 72 |         - "bar"
 73 |         - "baz"
 74 |         - ""
 75 | 
 76 |   - input: "foo bar baz ::asdf"
 77 |     atoms:
 78 |       verb: "foo"
 79 |       params:
 80 |         - "bar"
 81 |         - "baz"
 82 |         - ":asdf"
 83 | 
 84 |   # with source and trailing param
 85 |   - input: ":coolguy foo bar baz :asdf quux"
 86 |     atoms:
 87 |       source: "coolguy"
 88 |       verb: "foo"
 89 |       params:
 90 |         - "bar"
 91 |         - "baz"
 92 |         - "asdf quux"
 93 | 
 94 |   - input: ":coolguy foo bar baz :  asdf quux "
 95 |     atoms:
 96 |       source: "coolguy"
 97 |       verb: "foo"
 98 |       params:
 99 |         - "bar"
100 |         - "baz"
101 |         - "  asdf quux "
102 | 
103 |   - input: ":coolguy PRIVMSG bar :lol :) "
104 |     atoms:
105 |       source: "coolguy"
106 |       verb: "PRIVMSG"
107 |       params:
108 |         - "bar"
109 |         - "lol :) "
110 | 
111 |   - input: ":coolguy foo bar baz :"
112 |     atoms:
113 |       source: "coolguy"
114 |       verb: "foo"
115 |       params:
116 |         - "bar"
117 |         - "baz"
118 |         - ""
119 | 
120 |   - input: ":coolguy foo bar baz :  "
121 |     atoms:
122 |       source: "coolguy"
123 |       verb: "foo"
124 |       params:
125 |         - "bar"
126 |         - "baz"
127 |         - "  "
128 | 
129 |   # with tags
130 |   - input: "@a=b;c=32;k;rt=ql7 foo"
131 |     atoms:
132 |       verb: "foo"
133 |       tags:
134 |         "a": "b"
135 |         "c": "32"
136 |         "k": ""
137 |         "rt": "ql7"
138 | 
139 |   # with escaped tags
140 |   - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
141 |     atoms:
142 |       verb: "foo"
143 |       tags:
144 |         "a": "b\\and\nk"
145 |         "c": "72 45"
146 |         "d": "gh;764"
147 | 
148 |   # with tags and source
149 |   - input: "@c;h=;a=b :quux ab cd"
150 |     atoms:
151 |       tags:
152 |         "c": ""
153 |         "h": ""
154 |         "a": "b"
155 |       source: "quux"
156 |       verb: "ab"
157 |       params:
158 |         - "cd"
159 | 
160 |   # different forms of last param
161 |   - input: ":src JOIN #chan"
162 |     atoms:
163 |       source: "src"
164 |       verb: "JOIN"
165 |       params:
166 |         - "#chan"
167 | 
168 |   - input: ":src JOIN :#chan"
169 |     atoms:
170 |       source: "src"
171 |       verb: "JOIN"
172 |       params:
173 |         - "#chan"
174 | 
175 |   # with and without last param
176 |   - input: ":src AWAY"
177 |     atoms:
178 |       source: "src"
179 |       verb: "AWAY"
180 | 
181 |   - input: ":src AWAY "
182 |     atoms:
183 |       source: "src"
184 |       verb: "AWAY"
185 | 
186 |   # tab is not considered <SPACE>
187 |   - input: ":cool\tguy foo bar baz"
188 |     atoms:
189 |       source: "cool\tguy"
190 |       verb: "foo"
191 |       params:
192 |         - "bar"
193 |         - "baz"
194 | 
195 |   # with weird control codes in the source
196 |   - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
197 |     atoms:
198 |       source: "coolguy!ag@net\x035w\x03ork.admin"
199 |       verb: "PRIVMSG"
200 |       params:
201 |         - "foo"
202 |         - "bar baz"
203 | 
204 |   - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
205 |     atoms:
206 |       source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
207 |       verb: "PRIVMSG"
208 |       params:
209 |         - "foo"
210 |         - "bar baz"
211 | 
212 |   - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
213 |     atoms:
214 |       tags:
215 |         tag1: "value1"
216 |         tag2: ""
217 |         vendor1/tag3: "value2"
218 |         vendor2/tag4: ""
219 |       source: "irc.example.com"
220 |       verb: "COMMAND"
221 |       params:
222 |         - "param1"
223 |         - "param2"
224 |         - "param3 param3"
225 | 
226 |   - input: ":irc.example.com COMMAND param1 param2 :param3 param3"
227 |     atoms:
228 |       source: "irc.example.com"
229 |       verb: "COMMAND"
230 |       params:
231 |         - "param1"
232 |         - "param2"
233 |         - "param3 param3"
234 | 
235 |   - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
236 |     atoms:
237 |       tags:
238 |         tag1: "value1"
239 |         tag2: ""
240 |         vendor1/tag3: "value2"
241 |         vendor2/tag4: ""
242 |       verb: "COMMAND"
243 |       params:
244 |         - "param1"
245 |         - "param2"
246 |         - "param3 param3"
247 | 
248 |   - input: "COMMAND"
249 |     atoms:
250 |       verb: "COMMAND"
251 | 
252 |   # yaml encoding + slashes is fun
253 |   - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
254 |     atoms:
255 |       tags:
256 |         foo: "\\\\;\\s \r\n"
257 |       verb: "COMMAND"
258 | 
259 |   # broken messages from unreal
260 |   - input: ":gravel.mozilla.org 432  #momo :Erroneous Nickname: Illegal characters"
261 |     atoms:
262 |       source: "gravel.mozilla.org"
263 |       verb: "432"
264 |       params:
265 |         - "#momo"
266 |         - "Erroneous Nickname: Illegal characters"
267 | 
268 |   - input: ":gravel.mozilla.org MODE #tckk +n "
269 |     atoms:
270 |       source: "gravel.mozilla.org"
271 |       verb: "MODE"
272 |       params:
273 |         - "#tckk"
274 |         - "+n"
275 | 
276 |   - input: ":services.esper.net MODE #foo-bar +o foobar  "
277 |     atoms:
278 |       source: "services.esper.net"
279 |       verb: "MODE"
280 |       params:
281 |         - "#foo-bar"
282 |         - "+o"
283 |         - "foobar"
284 | 
285 |   # tag values should be parsed char-at-a-time to prevent wayward replacements.
286 |   - input: "@tag1=value\\\\ntest COMMAND"
287 |     atoms:
288 |       tags:
289 |         tag1: "value\\ntest"
290 |       verb: "COMMAND"
291 | 
292 |   # If a tag value has a slash followed by a character which doesn't need
293 |   # to be escaped, the slash should be dropped.
294 |   - input: "@tag1=value\\1 COMMAND"
295 |     atoms:
296 |       tags:
297 |         tag1: "value1"
298 |       verb: "COMMAND"
299 | 
300 |   # A slash at the end of a tag value should be dropped
301 |   - input: "@tag1=value1\\ COMMAND"
302 |     atoms:
303 |       tags:
304 |         tag1: "value1"
305 |       verb: "COMMAND"
306 | 
307 |   # Duplicate tags: Parsers SHOULD disregard all but the final occurence 
308 |   - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
309 |     atoms:
310 |       tags:
311 |         tag1: "5"
312 |         tag2: "3"
313 |         tag3: "4"
314 |       verb: "COMMAND"
315 | 
316 |   # vendored tags can have the same name as a non-vendored tag
317 |   - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
318 |     atoms:
319 |       tags:
320 |         tag1: "5"
321 |         tag2: "3"
322 |         tag3: "4"
323 |         vendor/tag2: "8"
324 |       verb: "COMMAND"
325 | 
326 |   # Some parsers handle /MODE in a special way, make sure they do it right
327 |   - input: ":SomeOp MODE #channel :+i"
328 |     atoms:
329 |       source: "SomeOp"
330 |       verb: "MODE"
331 |       params:
332 |       - "#channel"
333 |       - "+i"
334 | 
335 |   - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
336 |     atoms:
337 |       source: "SomeOp"
338 |       verb: "MODE"
339 |       params:
340 |       - "#channel"
341 |       - "+oo"
342 |       - "SomeUser"
343 |       - "AnotherUser"
344 | 


--------------------------------------------------------------------------------