├── ps_stream
    ├── __init__.py
    ├── cli
    │   ├── __init__.py
    │   └── main.py
    ├── publisher.yml
    ├── utils.py
    ├── collector.py
    └── publisher.py
├── requirements.txt
├── NOTICE
├── Dockerfile
├── setup.py
├── .gitignore
├── docker-compose.yml
└── README.md


/ps_stream/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ps_stream/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | This product includes derivations of software developed by
2 | Docker, Inc., used under the Apache License Version 2.0.
3 | 


--------------------------------------------------------------------------------
/ps_stream/publisher.yml:
--------------------------------------------------------------------------------
1 | message_keys:
2 |   PROJECT: '{PROJECT_ID}'
3 |   PROJ_ACTIVITY: '{PROJECT_ID}'
4 |   CURRENCY_CD_TBL: '{CURRENCY_CD}'
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ucalgary/python-librdkafka:3.7.0-0.11.6
 2 | 
 3 | RUN mkdir -p /usr/src/app
 4 | WORKDIR /usr/src/app
 5 | 
 6 | COPY setup.py /usr/src/app
 7 | COPY ps_stream /usr/src/app/ps_stream
 8 | RUN apk add --no-cache --virtual .build-deps \
 9 |       gcc \
10 |       git \
11 |       musl-dev && \
12 |     python setup.py install && \
13 |     apk del .build-deps
14 | 
15 | ENTRYPOINT ["/usr/local/bin/ps-stream"]
16 | CMD ["--help"]
17 | 
18 | LABEL maintainer="King Chung Huang <kchuang@ucalgary.ca>" \
19 |       org.label-schema.vcs-url="https://github.com/ucalgary/ps-stream"
20 | 


--------------------------------------------------------------------------------
/ps_stream/utils.py:
--------------------------------------------------------------------------------
 1 | def element_text(element):
 2 |     value = element.text
 3 |     if value:
 4 |         value.strip()
 5 |     return value
 6 | 
 7 | 
 8 | def element_to_obj(element, map_class=dict, value_f=element_text, wrap_value=True):
 9 |     value = None
10 | 
11 |     if len(element) > 0:
12 |         child_values = map(lambda e: (e.tag.split('}', 1)[-1], element_to_obj(
13 |             e, map_class=map_class, value_f=value_f, wrap_value=False)), element)
14 |         value = map_class(child_values)
15 |     else:
16 |         value = value_f(element)
17 | 
18 |     if wrap_value:
19 |         value = {element.tag.split('}', 1)[-1]: value}
20 |     return value
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import pkg_resources
 4 | from setuptools import find_packages
 5 | from setuptools import setup
 6 | 
 7 | 
 8 | install_requires = [
 9 |     'docopt==0.6.2',
10 |     'Twisted==16.6.0',
11 |     'PyYAML==3.12',
12 |     'pytz==2016.10',
13 |     'confluent-kafka==0.11.5',
14 |     'ujson==1.35',
15 |     'docopt_utils==0.0.0'
16 | ]
17 | 
18 | dependency_links = [
19 |     'https://github.com/ucalgary/docopt-utils/archive/master.zip#egg=docopt_utils-0.0.0',
20 | ]
21 | 
22 | 
23 | setup(
24 |     name='ps_stream',
25 |     description='Process PeopleSoft sync messages into logical streams',
26 |     author='King Chung Huang',
27 |     packages=find_packages(),
28 |     package_data={
29 |         '': ['*.yml']
30 |     },
31 |     install_requires=install_requires,
32 |     dependency_links=dependency_links,
33 |     entry_points="""
34 |     [console_scripts]
35 |     ps-stream=ps_stream.cli.main:main
36 |     """,
37 |     zip_safe=True
38 | )
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 | 
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 | 
61 | # Scrapy stuff:
62 | .scrapy
63 | 
64 | # Sphinx documentation
65 | docs/_build/
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 | 
73 | # pyenv
74 | .python-version
75 | 
76 | # celery beat schedule file
77 | celerybeat-schedule
78 | 
79 | # dotenv
80 | .env
81 | 
82 | # virtualenv
83 | .venv/
84 | venv/
85 | ENV/
86 | 
87 | # Spyder project settings
88 | .spyderproject
89 | 
90 | # Rope project settings
91 | .ropeproject
92 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   zookeeper:
 5 |     image: confluentinc/cp-zookeeper:3.3.0
 6 |     hostname: zookeeper
 7 |     environment:
 8 |       - ZOOKEEPER_CLIENT_PORT=2181
 9 |     networks:
10 |       - streaming
11 | 
12 |   kafka:
13 |     image: confluentinc/cp-kafka:3.3.0
14 |     hostname: kafka
15 |     environment:
16 |       - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
17 |       - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
18 |     networks:
19 |       - streaming
20 |     depends_on:
21 |       - zookeeper
22 | 
23 |   kafka-rest:
24 |     image: confluentinc/cp-kafka-rest:3.3.0
25 |     hostname: kafka-rest
26 |     environment:
27 |       - ACCESS_CONTROL_ALLOW_ORIGIN_DEFAULT="*"
28 |       - KAFKA_REST_SCHEMA_REGISTRY_URL=http://schema-registry:8081
29 |       - KAFKA_REST_ZOOKEEPER_CONNECT=zookeeper:2181
30 |       - KAFKA_REST_HOST_NAME=kafka-rest
31 |       - KAFKA_REST_LISTENERS=http://kafka-rest:8082
32 |       - KAFKA_REST_BOOTSTRAP_SERVERS=PLAINTEXT://kafka:9092
33 |     networks:
34 |       - streaming
35 |     depends_on:
36 |       - zookeeper
37 |       - kafka
38 | 
39 |   schema-registry:
40 |     image: confluentinc/cp-schema-registry:3.3.0
41 |     hostname: schema-registry
42 |     environment:
43 |       - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
44 |       - SCHEMA_REGISTRY_HOST_NAME=schema-registry
45 |       - SCHEMA_REGISTRY_LISTENERS=http://schema-registry:8081
46 |     networks:
47 |       - streaming
48 |     depends_on:
49 |       - zookeeper
50 |       - kafka
51 | 
52 |   ps-collector:
53 |     image: ucalgary/ps-stream
54 |     command:
55 |       - collect
56 |     ports:
57 |       - 8000:8000
58 |     networks:
59 |       - streaming
60 | 
61 |   ps-publisher:
62 |     image: ucalgary/ps-stream
63 |     command:
64 |       - publish
65 |     networks:
66 |       - streaming
67 | 
68 |   kafka-topics-ui:
69 |     image: landoop/kafka-topics-ui:0.9.2
70 |     environment:
71 |       - KAFKA_REST_PROXY_URL=http://kafka-rest:8082
72 |       - PROXY=true
73 |     ports:
74 |       - 8001:8000
75 |     networks:
76 |       - streaming
77 | 
78 | networks:
79 |   streaming:
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Process PeopleSoft Sync Messages into Kafka Topics
 2 | 
 3 | [![](https://images.microbadger.com/badges/image/ucalgary/ps-stream.svg)](https://microbadger.com/images/ucalgary/ps-stream) [![Anchore Image Overview](https://anchore.io/service/badges/image/a26f2562a708b063d8bf1e0f685f0b2bc75bde1725a787588d1be531b23f06ff)](https://anchore.io/image/dockerhub/a26f2562a708b063d8bf1e0f685f0b2bc75bde1725a787588d1be531b23f06ff?repo=ucalgary%2Fps-stream&tag=latest)
 4 | 
 5 | `ps-stream` is a Python utility that collects and and parses [PeopleSoft rowset-based messages](http://docs.oracle.com/cd/E66686_01/pt855pbr1/eng/pt/tibr/concept_PeopleSoftRowset-BasedMessageFormat-0764fb.html) generated by sync and fullsync services into Kafka messages and topics. PeopleSoft sync processes are normally used to sync data between PeopleSoft applications. However, they can also be used as a way to generate an externalized stream of PeopleSoft objects in streaming data pipelines.
 6 | 
 7 | There are two major commands in `ps-stream`.
 8 | 
 9 | **`collect`** accepts PeopleSoft rowset-based messages over http or https, and produces a Kafka message for each transaction in the PeopleSoft message, stored in one or more Kafka topics.
10 | 
11 | **`parse`** consumes transaction messages stored by `collect` and produces Kafka messages in topics with `KTable` semantics. Each record in the resulting stream is oriented to reflect records whose record key is the primary key or identifier of the transaction message.
12 | 
13 | ## Running a ps-stream container
14 | 
15 | Collect PeopleSoft sync messages.
16 | 
17 | ```
18 | $ docker run -p 8000:8000 -d ucalgary/ps-stream collect
19 | ```
20 | 
21 | ## Test Drive
22 | 
23 | You can quickly deploy ps-stream on Play with Docker, a community-run Docker playground, by clicking the following button.
24 | 
25 | [![Try in PWD](https://cdn.rawgit.com/play-with-docker/stacks/cff22438/assets/images/button.png)](http://play-with-docker.com?stack=https://raw.githubusercontent.com/ucalgary/ps-stream/master/docker-compose.yml&stack_name=ps-stream)
26 | 
27 | After starting the demo, try `POST`ing a PeopleSoft rowset-based message to the collector on port 8000, then using kafka-topics-ui on port 8001 to see the message in a Kafka topic.
28 | 
29 | ## Maintenance
30 | 
31 | This repository and image are currently maintained by the Research Management Systems project at the [University of Calgary](http://www.ucalgary.ca/).
32 | 


--------------------------------------------------------------------------------
/ps_stream/cli/main.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | 
  4 | from .. import collector
  5 | from .. import publisher
  6 | 
  7 | from docopt_utils.dispatcher import dispatch
  8 | 
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def main():
 14 |     def set_logging_level(handler, options):
 15 |         logging.basicConfig(level=logging.DEBUG if options['--verbose'] else logging.INFO)
 16 | 
 17 |     command_classes = {'__root__': PSStreamCommand}
 18 |     dispatch(command_classes, env='PSSTREAM', before_f=set_logging_level)
 19 | 
 20 | 
 21 | class PSStreamCommand(object):
 22 |     """Process PeopleSoft sync messages into Kafka topics.
 23 | 
 24 |     Usage:
 25 |       ps-stream [--kafka=<arg>]...
 26 |                 [--verbose]
 27 |                 [COMMAND] [ARGS...]
 28 |       ps-stream -h|--help
 29 | 
 30 |     Options:
 31 |       -k, --kafka HOSTS             Kafka bootstrap hosts [default: kafka:9092]
 32 |       --verbose                     Show more output
 33 | 
 34 |     Commands:
 35 |       collect            Collect PeopleSoft sync messages
 36 |       config             Validate and view the collector config
 37 |       publish            Parse transaction messages into record streams
 38 |     """
 39 | 
 40 |     def collect(self, options):
 41 |         """Collect PeopleSoft sync and fullsync messages.
 42 | 
 43 |         Usage: collect [--port=<arg>] [--target-prefix=<arg>] [--target-topic=<arg>]
 44 |                        [--accept-from=<arg>]...
 45 |                        [--accept-to=<arg>]...
 46 |                        [--accept-messagename=<arg>]...
 47 | 
 48 |         Options:
 49 |           --port PORT                 Port to listen to messages on [default: 8000]
 50 |           --accept-from NAMES         Accepted values for the From header
 51 |           --accept-to NAMES           Accepted values for the To header
 52 |           --accept-messagename NAMES  Accepted values for the MessageName header
 53 |           --target-prefix PREFIX      Prefix name for target topic [default: ps]
 54 |           --target-topic TOPIC        Topic to write transactions to [default: transactions]
 55 |         """
 56 |         config = kafka_config_from_options(options)
 57 | 
 58 |         collector.collect(
 59 |           config,
 60 |           topic=prefix_topics(options['--target-prefix'], options['--target-topic']),
 61 |           port=int(options['--port']),
 62 |           senders=options['--accept-from'],
 63 |           recipients=options['--accept-to'],
 64 |           message_names=options['--accept-messagename'])
 65 | 
 66 |     def config(self, options):
 67 |         """Validate and view the collector config.
 68 | 
 69 |         Usage: config
 70 |         """
 71 |         pass
 72 | 
 73 |     def publish(self, options):
 74 |         """Parse transaction messages into record streams.
 75 | 
 76 |         Usage: publish [--source-prefix=<arg>] [--source-topic=<arg>]...
 77 |                        [--target-prefix=<arg>] [--target-topic=<arg>]
 78 |                        [options]
 79 | 
 80 |         Options:
 81 |           --source-prefix PREFIX     Prefix string for source topics [default: ps]
 82 |           --source-topic NAME        Topics to consume transactions from [default: transactions]
 83 |           --target-prefix PREFIX     Prefix name for target topics [default: ps]
 84 |           --target-topic NAME        Topic to write records to, defaults to the record type
 85 |           --consumer-group GROUP     Kafka consumer group name [default: ps-stream]
 86 |         """
 87 |         config = kafka_config_from_options(options)
 88 | 
 89 |         publisher.publish(
 90 |           config,
 91 |           source_topics=prefix_topics(options['--source-prefix'], options['--source-topic']),
 92 |           target_topic=prefix_topics(options['--target-prefix'], options['--target-topic']),
 93 |           target_prefix=options['--target-prefix'])
 94 | 
 95 | 
 96 | def kafka_config_from_options(options):
 97 |     config = dict()
 98 | 
 99 |     if '--kafka' in options:
100 |         config['bootstrap.servers'] = ','.join(options['--kafka'])
101 |     if '--consumer-group' in options:
102 |         config['group.id'] = options['--consumer-group']
103 | 
104 |     return config
105 | 
106 | 
107 | def prefix_topics(prefix, topics):
108 |     if not topics:
109 |         return topics
110 |     if prefix:
111 |         if not isinstance(topics, str):
112 |             return [f'{prefix}.{topic}' for topic in topics]
113 |         else:
114 |             return f'{prefix}.{topics}'
115 |     return topics
116 | 


--------------------------------------------------------------------------------
/ps_stream/collector.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pytz
  3 | from datetime import datetime
  4 | from xml.etree import ElementTree
  5 | 
  6 | import ujson as json
  7 | from confluent_kafka import Producer
  8 | from twisted.internet import endpoints, reactor
  9 | from twisted.web import resource, server
 10 | 
 11 | from .utils import element_to_obj
 12 | 
 13 | 
 14 | log = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class PSStreamCollector(resource.Resource):
 18 | 
 19 |     isLeaf = True
 20 | 
 21 |     def __init__(self, producer, topic=None, authorize_f=None):
 22 |         super().__init__()
 23 |         self.producer = producer
 24 |         self.topic = topic
 25 |         self.authorize_f = authorize_f
 26 | 
 27 |     def render_GET(self, request):
 28 |         return '{"status":"GET ok"}'.encode('utf-8')
 29 | 
 30 |     def render_POST(self, request):
 31 |         """Decode PeopleSoft rowset-based messages into transactions, and produce Kafka
 32 |         messages for each transaction. PeopleSoft is expected to POST messages as events
 33 |         occur via SYNC and FULLSYNC services.
 34 | 
 35 |         The following URL describes the PeopleSoft Rowset-Based Message Format.
 36 |         http://docs.oracle.com/cd/E66686_01/pt855pbr1/eng/pt/tibr/concept_PeopleSoftRowset-BasedMessageFormat-0764fb.html
 37 |         """
 38 |         if self.authorize_f and not self.authorize_f(request):
 39 |             request.setResponseCode(403, message='Forbidden')
 40 |             log.info('Unauthorized message received')
 41 |             log.debug('To: {}, From: {}, MessageName: {}'.format(
 42 |                 request.getHeader('To'),
 43 |                 request.getHeader('From'),
 44 |                 request.getHeader('MessageName')))
 45 |             return 'Message not accepted by collector.'.encode('utf-8')
 46 | 
 47 |         assert(request.getHeader('DataChunk') == '1')
 48 |         assert(request.getHeader('DataChunkCount') == '1')
 49 | 
 50 |         psft_message_name = None
 51 |         field_types = None
 52 | 
 53 |         transaction_id = request.getHeader('TransactionID')
 54 |         orig_time_stamp = request.getHeader('OrigTimeStamp')
 55 | 
 56 |         # Parse the root element for the PeopleSoft message name and FieldTypes
 57 |         request.content.seek(0, 0)
 58 |         for event, e in ElementTree.iterparse(request.content, events=('start', 'end')):
 59 |             if event == 'start' and psft_message_name is None:
 60 |                 psft_message_name = e.tag.split('}', 1)[-1]
 61 |             elif event == 'end' and e.tag.split('}', 1)[-1] == 'FieldTypes':
 62 |                 field_types = element_to_obj(e, value_f=field_type)
 63 |                 break
 64 | 
 65 |         # Rescan for transactions, removing read elements to reduce memory usage
 66 |         transaction_index = 1
 67 |         request.content.seek(0, 0)
 68 |         for event, e in ElementTree.iterparse(request.content, events=('end',)):
 69 |             if e.tag.split('}', 1)[-1] == 'Transaction':
 70 |                 transaction = ElementTree.tostring(e, encoding='unicode')
 71 |                 message = {
 72 |                     'TransactionID': transaction_id,
 73 |                     'TransactionIndex': transaction_index,
 74 |                     'OrigTimeStamp': orig_time_stamp,
 75 |                     'CollectTimeStamp': datetime.now(pytz.utc).astimezone().isoformat(),
 76 |                     'Transaction': transaction
 77 |                 }
 78 |                 message_str = json.dumps(message)
 79 |                 self.producer.produce(self.topic, message_str, transaction_id)
 80 |                 e.clear()
 81 |                 transaction_index += 1
 82 |         self.producer.flush()
 83 | 
 84 |         return '{"status":"POST ok"}'.encode('utf-8')
 85 | 
 86 | 
 87 | def collect(config, topic=None, port=8000, senders=None, recipients=None, message_names=None):
 88 |     def authorize_request(request):
 89 |         if senders and not request.getHeader('To') in senders:
 90 |             return False
 91 |         if recipients and not request.getHeader('From') in senders:
 92 |             return False
 93 |         if message_names and request.getHeader('MessageName') in senders:
 94 |             return False
 95 |         return True
 96 | 
 97 |     producer = Producer(config)
 98 |     collector = PSStreamCollector(producer, topic=topic, authorize_f=authorize_request)
 99 |     site = server.Site(collector)
100 |     endpoint = endpoints.TCP4ServerEndpoint(reactor, int(port))
101 |     endpoint.listen(site)
102 |     log.info(f'Listening for connections on port {port}')
103 |     reactor.run()
104 | 
105 | 
106 | def field_type(element):
107 |     assert('type' in element.attrib)
108 |     return element.attrib.get('type')
109 | 


--------------------------------------------------------------------------------
/ps_stream/publisher.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pkg_resources
  3 | import signal
  4 | import sys
  5 | from difflib import SequenceMatcher
  6 | from xml.etree import ElementTree
  7 | 
  8 | import ujson as json
  9 | import yaml
 10 | from confluent_kafka import Consumer, Producer
 11 | from confluent_kafka import KafkaError
 12 | 
 13 | from .utils import element_to_obj
 14 | 
 15 | 
 16 | log = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | key_formats_by_record_type = yaml.load(
 20 |     pkg_resources.resource_stream(__name__, 'publisher.yml'))['message_keys']
 21 | 
 22 | 
 23 | class PSStreamPublisher(object):
 24 | 
 25 |     def __init__(self, consumer, producer,
 26 |                  source_topics=None, target_topic=None, target_prefix=None):
 27 |         super().__init__()
 28 |         self.consumer = consumer
 29 |         self.producer = producer
 30 |         self.source_topics = source_topics
 31 |         self.target_topic = target_topic
 32 |         self.target_prefix = target_prefix
 33 |         self.running = True
 34 | 
 35 |     def run(self):
 36 |         '''Process transactions from the source topics and publish
 37 |         messages representing a stream of PeopleSoft rows organized
 38 |         by record name.
 39 |         '''
 40 |         signal.signal(signal.SIGINT, self.terminate)
 41 |         signal.signal(signal.SIGTERM, self.terminate)
 42 | 
 43 |         self.consumer.subscribe(self.source_topics)
 44 | 
 45 |         while self.running:
 46 |             message = self.consumer.poll(timeout=5)
 47 | 
 48 |             if not message:
 49 |                 continue
 50 |             elif not message.error():
 51 |                 transaction = json.loads(message.value().decode('utf-8'))
 52 | 
 53 |                 for topic, key, value in self.messages_from_transaction(transaction):
 54 |                     self.producer.produce(topic, value, key)
 55 |             elif message.error().code() != KafkaError._PARTITION_EOF:
 56 |                 print(message.error())
 57 |                 self.running = False
 58 | 
 59 |         self.terminate()
 60 | 
 61 |     def terminate(self):
 62 |         log.info('Terminating')
 63 |         self.consumer.close()
 64 |         self.producer.flush()
 65 |         sys.exit(0)
 66 | 
 67 |     def messages_from_transaction(self, transaction, key_serde=json.dumps, value_serde=json.dumps):
 68 |         transaction['Transaction'] = element_to_obj(
 69 |             ElementTree.fromstring(transaction['Transaction']), wrap_value=False)
 70 | 
 71 |         audit_actn = transaction['Transaction']['PSCAMA']['AUDIT_ACTN']
 72 |         if audit_actn is not None and audit_actn not in ('A', 'C', 'D', 'K', 'N', 'O'):
 73 |             log.info('Invalid AUDIT_ACTN received')
 74 |             log.debug(transaction)
 75 |             return
 76 | 
 77 |         for record_type, record_data in transaction['Transaction'].items():
 78 |             if record_type == 'PSCAMA':
 79 |                 continue
 80 |             topic = self.topic_for_record(record_type, record_data)
 81 |             key = self.key_for_record(record_type, record_data)
 82 |             value = audit_actn in (None, 'A', 'C') and record_data or None
 83 |             log.debug(f'Producing to topic {topic} with key {key}')
 84 |             if key and key_serde:
 85 |                 key = key_serde(key)
 86 |             if value and value_serde:
 87 |                 value = value_serde(value)
 88 |             yield topic, key, value
 89 | 
 90 |     def topic_for_record(self, record_type, record_data):
 91 |         if self.target_topic:
 92 |             return self.target_topic
 93 |         elif self.target_prefix:
 94 |             return f'{self.target_prefix}.{record_type}'
 95 |         return record_type
 96 | 
 97 |     def key_for_record(self, record_type, record_data, guess=False):
 98 |         key_format = key_formats_by_record_type.get(record_type, None)
 99 |         if not key_format and guess:
100 |             keys = record_data.keys()
101 |             keys = sorted(keys,
102 |                           key=lambda x: SequenceMatcher(a=record_type, b=x).ratio(),
103 |                           reverse=True)
104 |             key_attribute = keys[0]
105 |             key_format = '{%s}' % key_attribute
106 |             key_formats_by_record_type[record_type] = key_format
107 |         return key_format and key_format.format(**record_data)
108 | 
109 | 
110 | def publish(config, source_topics=None, target_topic=None, target_prefix=None):
111 |     consumer_config = {**config, ** {
112 |         'default.topic.config': {
113 |             'auto.offset.reset': 'smallest',
114 |             'auto.commit.interval.ms': 5000
115 |         }
116 |     }}
117 |     producer_config = config
118 |     consumer = Consumer(consumer_config)
119 |     producer = Producer(producer_config)
120 |     publisher = PSStreamPublisher(
121 |         consumer, producer,
122 |         source_topics=source_topics, target_topic=target_topic, target_prefix=target_prefix)
123 |     log.info(f'Reading transactions from {source_topics}')
124 |     publisher.run()
125 | 


--------------------------------------------------------------------------------