├── .project
├── README.md
├── apache-real-time
├── flume.conf
├── morphline.conf
├── schema.xml
└── solrconfig.xml
├── email-search
├── email-schema.xml
├── flume.config
├── indexer-config.xml
├── ks-indexer-email-morphlines.conf
├── morphlines.conf
└── schema.xml
└── ocr
├── IdmpExtraction.scala
├── indexer-config.xml
├── morphlines.conf
└── schema.xml
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | cloudera-search
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | cloudera-search
2 | ===============
3 | This repository has all you need to set up Cloudera Search in Environment
4 |
5 | The directories are broken down by Use Cases
6 |
--------------------------------------------------------------------------------
/apache-real-time/flume.conf:
--------------------------------------------------------------------------------
1 | # Please paste flume.conf here. Example:
2 |
3 | # Sources, channels, and sinks are defined per
4 | # agent name, in this case 'tier1'.
5 | tier1.sources = source1
6 | tier1.channels = channel1
7 | tier1.sinks = sink1
8 |
9 | # For each source, channel, and sink, set
10 | # standard properties.
11 |
12 |
13 |
14 | # Syslog Source Configuration
15 | tier1.sources.source1.type = syslogtcp
16 | # the hostname that Flume Syslog source will be running on
17 | tier1.sources.source1.host = localhost
18 | # the port that Flume Syslog source will listen on
19 | tier1.sources.source1.port = 5040
20 | tier1.sources.source1.channels = channel1
21 |
22 |
23 | # Solr Sink configuration
24 | tier1.sinks.sink1.type = org.apache.flume.sink.solr.morphline.MorphlineSolrSink
25 | tier1.sinks.sink1.morphlineFile = /tmp/morphline.conf
26 | tier1.sinks.sink1.channel = channel1
27 |
28 |
29 |
30 | # Kafka Channel Configuration
31 | tier1.channels.channel1.type = org.apache.flume.channel.kafka.KafkaChannel
32 | tier1.channels.channel1.capacity = 10000
33 | tier1.channels.channel1.transactionCapacity = 1000
34 | tier1.channels.channel1.brokerList = kafkaf-2:9092,kafkaf-3:9092
35 | tier1.channels.channel1.topic = channel1
36 | tier1.channels.channel1.zookeeperConnect = kafkaf-1:2181
37 | tier1.channels.channel1.groupId = flume2
38 |
--------------------------------------------------------------------------------
/apache-real-time/morphline.conf:
--------------------------------------------------------------------------------
1 | # Specify server locations in a SOLR_LOCATOR variable;
2 | # used later in variable substitutions
3 | # Change the zkHost to point to your own Zookeeper quorum
4 | SOLR_LOCATOR : {
5 | # Name of solr collection
6 | collection : accessCollection
7 | # ZooKeeper ensemble
8 | zkHost : "localhost:2181/solr"
9 | }
10 |
11 | # Specify an array of one or more morphlines, each of which defines an ETL
12 | # transformation chain. A morphline consists of one or more (potentially
13 | # nested) commands. A morphline is a way to consume records (e.g. Flume events,
14 | # HDFS files or blocks), turn them into a stream of records, and pipe the stream
15 | # of records through a set of easily configurable transformations on it's way to
16 | # Solr (or a MapReduceIndexerTool RecordWriter that feeds via a Reducer into Solr).
17 | morphlines : [
18 | {
19 | # Name used to identify a morphline. E.g. used if there are multiple morphlines in a
20 | # morphline config file
21 | id : morphline1
22 | # Import all morphline commands in these java packages and their subpackages.
23 | # Other commands that may be present on the classpath are not visible to this morphline.
24 | importCommands : ["org.kitesdk.**", "org.apache.solr.**"]
25 | commands : [
26 | {
27 | ## Read the email stream and break it up into individual messages.
28 | ## The beginning of a message is marked by regex clause below
29 | ## The reason we use this command is that one event can have multiple
30 | ## messages
31 | readCSV {
32 | separator: " "
33 | columns: [client_ip,C1,C2,time,dummy1,request,code,bytes,referer,user_agent,C3]
34 | ignoreFirstLine : false
35 | quoteChar : "\""
36 | commentPrefix : ""
37 | trim : true
38 | charset : UTF-8
39 | }
40 | }
41 | {
42 | split {
43 | inputField : request
44 | outputFields : [method, url, protocol]
45 | separator : " "
46 | isRegex : false
47 | #separator : """\s*,\s*"""
48 | # #isRegex : true
49 | addEmptyStrings : false
50 | trim : true
51 | }
52 | }
53 | {
54 | split {
55 | inputField : url
56 | outputFields : ["", app, subapp]
57 | separator : "\/"
58 | isRegex : false
59 | #separator : """\s*,\s*"""
60 | # #isRegex : true
61 | addEmptyStrings : false
62 | trim : true
63 | }
64 | }
65 | {
66 | userAgent {
67 | inputField : user_agent
68 | outputFields : {
69 | user_agent_family : "@{ua_family}"
70 | user_agent_major : "@{ua_major}"
71 | device_family : "@{device_family}"
72 | os_family : "@{os_family}"
73 | os_major : "@{os_major}"
74 | }
75 | }
76 | }
77 | {
78 | #Extract GEO information
79 | geoIP {
80 | inputField : client_ip
81 | database : "/tmp/GeoLite2-City.mmdb"
82 | }
83 | }
84 | {
85 |
86 | # extract parts of the geolocation info from the Jackson JsonNode Java
87 | # # object contained in the _attachment_body field and store the parts in
88 | # # the given record output fields:
89 | extractJsonPaths {
90 | flatten : false
91 | paths : {
92 | country_code : /country/iso_code
93 | country_name : /country/names/en
94 | region_code : /continent/code
95 | #"/subdivisions[]/names/en" : "/subdivisions[]/names/en"
96 | #"/subdivisions[]/iso_code" : "/subdivisions[]/iso_code"
97 | city : /city/names/en
98 | #/postal/code : /postal/code
99 | latitude : /location/latitude
100 | longitude : /location/longitude
101 | #/location/latitude_longitude : /location/latitude_longitude
102 | #/location/longitude_latitude : /location/longitude_latitude
103 | }
104 | }
105 | }
106 | #{logInfo { format : "BODY : {}", args : ["@{}"] } }
107 | # add Unique ID, in case our message_id field from above is not present
108 | {
109 | generateUUID {
110 | field:id
111 | }
112 | }
113 |
114 | # convert the timestamp field to "yyyy-MM-dd'T'HH:mm:ss.SSSZ" format
115 | {
116 | # 21/Nov/2014:22:08:27
117 | convertTimestamp {
118 | field : time
119 | inputFormats : ["[dd/MMM/yyyy:HH:mm:ss", "EEE, d MMM yyyy HH:mm:ss Z", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"]
120 | inputTimezone : America/Los_Angeles
121 | outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
122 | outputTimezone : UTC
123 | }
124 | }
125 |
126 | # Consume the output record of the previous command and pipe another
127 | # record downstream.
128 | #
129 | # This command sanitizes record fields that are unknown to Solr schema.xml
130 | # by deleting them. Recall that Solr throws an exception on any attempt to
131 | # load a document that contains a field that isn't specified in schema.xml
132 | {
133 | sanitizeUnknownSolrFields {
134 | # Location from which to fetch Solr schema
135 | solrLocator : ${SOLR_LOCATOR}
136 | }
137 | }
138 |
139 | # load the record into a SolrServer or MapReduce SolrOutputFormat.
140 | {
141 | loadSolr {
142 | solrLocator : ${SOLR_LOCATOR}
143 | }
144 | }
145 | ]
146 | }
147 | ]
148 |
149 |
150 |
--------------------------------------------------------------------------------
/apache-real-time/schema.xml:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 |
47 |
48 |
49 |
65 |
66 |
67 |
101 |
102 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
149 | id
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
184 |
185 |
191 |
192 |
195 |
196 |
197 |
198 |
205 |
206 |
210 |
211 |
212 |
213 |
214 |
215 |
230 |
231 |
237 |
238 |
239 |
240 |
241 |
242 |
252 |
253 |
254 |
255 |
256 |
257 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
314 |
315 |
316 |
327 |
328 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
348 |
349 |
350 |
351 |
352 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
370 |
371 |
372 |
373 |
376 |
380 |
385 |
386 |
387 |
388 |
391 |
392 |
393 |
394 |
395 |
396 |
401 |
402 |
403 |
404 |
407 |
408 |
409 |
410 |
411 |
420 |
421 |
422 |
423 |
426 |
430 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
468 |
469 |
470 |
471 |
472 |
474 |
475 |
476 |
477 |
478 |
479 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
499 |
500 |
504 |
505 |
506 |
509 |
510 |
513 |
514 |
515 |
516 |
527 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
593 |
594 |
595 |
606 |
607 |
608 |
609 |
610 |
611 |
615 |
617 |
618 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
910 |
911 |
912 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
920 |
921 |
922 |
923 |
924 |
925 |
926 |
927 |
928 |
929 |
930 |
931 |
932 |
933 |
934 |
935 |
936 |
937 |
938 |
939 |
940 |
941 |
942 |
943 |
944 |
945 |
946 |
947 |
948 |
949 |
950 |
951 |
952 |
953 |
954 |
955 |
956 |
957 |
958 |
959 |
960 |
961 |
962 |
963 |
964 |
965 |
966 |
967 |
968 |
969 |
974 |
979 |
980 |
981 |
--------------------------------------------------------------------------------
/email-search/email-schema.xml:
--------------------------------------------------------------------------------
1 |
29 |
30 |
31 |
47 |
48 |
49 |
83 |
84 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
117 | id
118 |
119 |
120 |
126 |
127 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
158 |
159 |
163 |
164 |
165 |
166 |
167 |
168 |
183 |
184 |
190 |
191 |
192 |
193 |
194 |
195 |
205 |
206 |
207 |
208 |
209 |
210 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
267 |
268 |
269 |
280 |
281 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
301 |
302 |
303 |
304 |
305 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
347 |
348 |
353 |
354 |
355 |
356 |
359 |
363 |
368 |
369 |
370 |
371 |
374 |
375 |
376 |
377 |
378 |
379 |
384 |
385 |
386 |
387 |
390 |
391 |
392 |
393 |
394 |
403 |
404 |
405 |
406 |
409 |
413 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
451 |
452 |
453 |
454 |
455 |
457 |
458 |
459 |
460 |
461 |
462 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
482 |
483 |
487 |
488 |
489 |
492 |
493 |
496 |
497 |
498 |
499 |
510 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
576 |
577 |
578 |
589 |
590 |
591 |
592 |
593 |
594 |
598 |
600 |
601 |
612 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
859 |
860 |
861 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
910 |
911 |
912 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
920 |
921 |
922 |
923 |
924 |
925 |
926 |
927 |
928 |
929 |
930 |
931 |
932 |
933 |
934 |
935 |
936 |
937 |
938 |
939 |
940 |
941 |
942 |
943 |
944 |
945 |
946 |
947 |
948 |
949 |
950 |
951 |
952 |
953 |
954 |
955 |
956 |
957 |
958 |
959 |
960 |
961 |
962 |
963 |
964 |
965 |
966 |
967 |
968 |
969 |
970 |
971 |
972 |
973 |
974 |
975 |
976 |
977 |
978 |
979 |
980 |
981 |
982 |
983 |
984 |
985 |
986 |
987 |
988 |
989 |
990 |
991 |
992 |
993 |
994 |
995 |
996 |
997 |
998 |
999 |
1000 |
1001 |
1002 |
1003 |
1004 |
1005 |
1006 |
1011 |
1016 |
1017 |
1018 |
--------------------------------------------------------------------------------
/email-search/flume.config:
--------------------------------------------------------------------------------
1 | # Please paste flume.conf here. Example:
2 |
3 | # Sources, channels, and sinks are defined per
4 | # agent name, in this case 'tier1'.
5 | tier1.sources=emailsSrc
6 | tier1.channels=emailsChannel solrChannel
7 | tier1.sinks=emailsSink solrSink
8 |
9 |
10 | # For each source, channel, and sink, set
11 | # standard properties.
12 | #tier1.sources.emailsSrc.command=tail -F /tmp/emails.txt
13 | #tier1.sources.emailsSrc.type=exec
14 | #tier1.sources.emailsSrc.command=cat /tmp/emails/emails_sample.txt
15 | tier1.sources.emailsSrc.type=spooldir
16 | tier1.sources.emailsSrc.spoolDir =/tmp/emails/spool
17 | tier1.sources.emailsSrc.deserializer = org.apache.flume.sink.solr.morphline.BlobDeserializer$Builder
18 | tier1.sources.emailsSrc.deletePolicy=immediate
19 | tier1.sources.emailsSrc.channels=emailsChannel solrChannel
20 | tier1.sources.emailsSrc.selector.type=replicating
21 |
22 |
23 | tier1.channels.emailsChannel.type=memory
24 | tier1.channels.solrChannel.type=memory
25 |
26 |
27 | # HDFS Sink
28 | tier1.sinks.emailsSink.type=hdfs
29 | tier1.sinks.emailsSink.hdfs.path=/user/flume
30 | tier1.sinks.emailsSink.hdfs.filePrefix=email_log
31 | tier1.sinks.emailsSink.hdfs.rollSize=102400000
32 | tier1.sinks.emailsSink.hdfs.batchSize=10000
33 | tier1.sinks.emailsSink.hdfs.rollCount=0
34 | tier1.sinks.emailsSink.hdfs.minBlockReplicas=1
35 | tier1.sinks.emailsSink.hdfs.txnEventMax=10000
36 | tier1.sinks.emailsSink.hdfs.callTimeout=1000000
37 | tier1.sinks.emailsSink.channel=emailsChannel
38 |
39 | # SOLR Sink
40 | tier1.sinks.solrSink.type=org.apache.flume.sink.solr.morphline.MorphlineSolrSink
41 | tier1.sinks.solrSink.channel=solrChannel
42 | tier1.sinks.solrSink.morphlineFile=/tmp/morphline.conf
43 |
44 |
45 | # Other properties are specific to each type of
46 | # source, channel, or sink. In this case, we
47 | # specify the capacity of the memory channel.
48 | tier1.channels.emailsChannel.capacity=100000000
49 | tier1.channels.emailsChannel.transactionCapacity=10000000
50 |
51 | tier1.channels.solrChannel.capacity=100000000
52 | tier1.channels.solrChannel.transactionCapacity=10000000
53 |
54 |
--------------------------------------------------------------------------------
/email-search/indexer-config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/email-search/ks-indexer-email-morphlines.conf:
--------------------------------------------------------------------------------
1 | SOLR_LOCATOR : {
2 | # Name of solr collection
3 | collection : collection1
4 |
5 | # ZooKeeper ensemble
6 | zkHost : "$ZK_HOST"
7 | }
8 |
9 |
10 | morphlines : [
11 | {
12 | id : morphline1
13 | importCommands : ["com.cloudera.**", "com.ngdata.**"]
14 |
15 | commands : [
16 | {
17 | extractHBaseCells {
18 | mappings : [
19 | {
20 | inputColumn : "messages:*"
21 | outputField : "message"
22 | type : string
23 | source : value
24 | }
25 | ]
26 | }
27 | }
28 | ## Break up the email text into SOLR fields
29 | {
30 | if {
31 | conditions: [
32 | {
33 | not{
34 | grok {
35 | expressions : {
36 | message: """(?s)(.*?)(Message-ID: <)(?(.+?))(>.*?)(Date: )(?(.+?))( \(.+?)(From: )(?(.*?))((To: )(?(.+?)))?(Subject: )(?(.*?))((Cc: )(?(.*)))?(Mime.+?)((Bcc: )(?(.*)))?(X-From: )(?(.*?))(X-To: )(?(.*?))(X-cc: )(?(.*?))(X-bcc: )(?(.*?))(X-Folder: )(?(.*?))(X-Origin: )(?(.*?))(X-FileName: )(?(.*?))(\n)(?(.*))"""
37 | }
38 | extract: inplace
39 | findSubstrings: false
40 | addEmptyStrings: false
41 | numRequiredMatches: all
42 | }
43 | }
44 | }
45 | ]
46 | then:[
47 | { logInfo { format : "found no grok match: {}", args : ["@{}"] } }
48 | { dropRecord {} }
49 | ]
50 | }
51 | }
52 |
53 | # add Unique ID, in case our message_id field from above is not present
54 | {
55 | generateUUID {
56 | field:message_id
57 | }
58 | }
59 |
60 | # convert the timestamp field to "yyyy-MM-dd'T'HH:mm:ss.SSSZ" format
61 | {
62 | convertTimestamp {
63 | field : date
64 | inputFormats : ["EEE, d MMM yyyy HH:mm:ss Z", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"]
65 | inputTimezone : America/Los_Angeles
66 | outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
67 | outputTimezone : UTC
68 | }
69 | }
70 |
71 | # Consume the output record of the previous command and pipe another
72 | # record downstream.
73 | #
74 | # This command sanitizes record fields that are unknown to Solr schema.xml
75 | # by deleting them. Recall that Solr throws an exception on any attempt to
76 | # load a document that contains a field that isn't specified in schema.xml
77 | {
78 | sanitizeUnknownSolrFields {
79 | # Location from which to fetch Solr schema
80 | solrLocator : ${SOLR_LOCATOR}
81 | }
82 | }
83 | ]
84 | }
85 | ]
86 |
--------------------------------------------------------------------------------
/email-search/morphlines.conf:
--------------------------------------------------------------------------------
1 | # Specify server locations in a SOLR_LOCATOR variable;
2 | # used later in variable substitutions
3 | # Change the zkHost to point to your own Zookeeper quorum
4 | SOLR_LOCATOR : {
5 | # Name of solr collection
6 | collection : email_collection
7 | # ZooKeeper ensemble
8 | zkHost : "clust2:2181/solr"
9 | }
10 |
11 | # Specify an array of one or more morphlines, each of which defines an ETL
12 | # transformation chain. A morphline consists of one or more (potentially
13 | # nested) commands. A morphline is a way to consume records (e.g. Flume events,
14 | # HDFS files or blocks), turn them into a stream of records, and pipe the stream
15 | # of records through a set of easily configurable transformations on it's way to
16 | # Solr (or a MapReduceIndexerTool RecordWriter that feeds via a Reducer into Solr).
17 | morphlines : [
18 | {
19 | # Name used to identify a morphline. E.g. used if there are multiple morphlines in a
20 | # morphline config file
21 | id : morphline1
22 | # Import all morphline commands in these java packages and their subpackages.
23 | # Other commands that may be present on the classpath are not visible to this morphline.
24 | importCommands : ["com.cloudera.**", "org.apache.solr.**"]
25 | commands : [
26 | {
27 | ## Read the email stream and break it up into individual messages.
28 | ## The beginning of a message is marked by regex clause below
29 | ## The reason we use this command is that one event can have multiple
30 | ## messages
31 | readMultiLine {
32 | regex : "Message-ID:.*"
33 | what : next
34 | charset : UTF-8
35 | }
36 | }
37 | ## Break up the email text into SOLR fields
38 | {
39 | if {
40 | conditions: [
41 | {
42 | not{
43 | grok {
44 | expressions : {
45 | message: """(?s)(.*?)(Message-ID: <)(?(.+?))(>.*?)(Date: )(?(.+?))( \(.+?)(From: )(?(.*?))((To: )(?(.+?)))?(Subject: )(?(.*?))((Cc: )(?(.*)))?(Mime.+?)((Bcc: )(?(.*)))?(X-From: )(?(.*?))(X-To: )(?(.*?))(X-cc: )(?(.*?))(X-bcc: )(?(.*?))(X-Folder: )(?(.*?))(X-Origin: )(?(.*?))(X-FileName: )(?(.*?))(\n)(?(.*))"""
46 | }
47 | extract: inplace
48 | findSubstrings: false
49 | addEmptyStrings: false
50 | numRequiredMatches: all
51 | }
52 | }
53 | }
54 | ]
55 | then:[
56 | { logInfo { format : "found no grok match: {}", args : ["@{}"] } }
57 | { dropRecord {} }
58 | ]
59 | }
60 | }
61 |
62 | # add Unique ID, in case our message_id field from above is not present
63 | {
64 | generateUUID {
65 | field:message_id
66 | }
67 | }
68 |
69 | # convert the timestamp field to "yyyy-MM-dd'T'HH:mm:ss.SSSZ" format
70 | {
71 | convertTimestamp {
72 | field : date
73 | inputFormats : ["EEE, d MMM yyyy HH:mm:ss Z", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"]
74 | inputTimezone : America/Los_Angeles
75 | outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
76 | outputTimezone : UTC
77 | }
78 | }
79 |
80 | # Consume the output record of the previous command and pipe another
81 | # record downstream.
82 | #
83 | # This command sanitizes record fields that are unknown to Solr schema.xml
84 | # by deleting them. Recall that Solr throws an exception on any attempt to
85 | # load a document that contains a field that isn't specified in schema.xml
86 | {
87 | sanitizeUnknownSolrFields {
88 | # Location from which to fetch Solr schema
89 | solrLocator : ${SOLR_LOCATOR}
90 | }
91 | }
92 |
93 | # load the record into a SolrServer or MapReduce SolrOutputFormat.
94 | {
95 | loadSolr {
96 | solrLocator : ${SOLR_LOCATOR}
97 | }
98 | }
99 | ]
100 | }
101 | ]
102 |
--------------------------------------------------------------------------------
/email-search/schema.xml:
--------------------------------------------------------------------------------
1 |
2 |
30 |
31 |
32 |
48 |
49 |
50 |
84 |
85 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
117 | message_id
118 |
119 |
120 |
126 |
127 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
157 |
158 |
162 |
163 |
164 |
165 |
166 |
167 |
182 |
183 |
189 |
190 |
191 |
192 |
193 |
194 |
204 |
205 |
206 |
207 |
208 |
209 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
266 |
267 |
268 |
279 |
280 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
300 |
301 |
302 |
303 |
304 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
346 |
347 |
352 |
353 |
354 |
355 |
358 |
362 |
367 |
368 |
369 |
370 |
373 |
374 |
375 |
376 |
377 |
378 |
383 |
384 |
385 |
386 |
389 |
390 |
391 |
392 |
393 |
402 |
403 |
404 |
405 |
408 |
412 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
450 |
451 |
452 |
453 |
454 |
456 |
457 |
458 |
459 |
460 |
461 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
481 |
482 |
486 |
487 |
488 |
491 |
492 |
495 |
496 |
497 |
498 |
509 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
575 |
576 |
577 |
588 |
589 |
590 |
591 |
592 |
593 |
597 |
599 |
600 |
611 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
858 |
859 |
860 |
888 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
910 |
911 |
912 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
920 |
921 |
922 |
923 |
924 |
925 |
926 |
927 |
928 |
929 |
930 |
931 |
932 |
933 |
934 |
935 |
936 |
937 |
938 |
939 |
940 |
941 |
942 |
943 |
944 |
945 |
946 |
947 |
948 |
949 |
950 |
951 |
952 |
953 |
954 |
955 |
956 |
957 |
958 |
959 |
960 |
961 |
962 |
963 |
964 |
965 |
966 |
967 |
968 |
969 |
970 |
971 |
972 |
973 |
974 |
975 |
976 |
977 |
978 |
979 |
980 |
981 |
982 |
983 |
984 |
985 |
986 |
987 |
988 |
989 |
990 |
991 |
992 |
993 |
994 |
995 |
996 |
997 |
998 |
999 |
1000 |
1001 |
1002 |
1003 |
1004 |
1005 |
1010 |
1015 |
1016 |
1017 |
--------------------------------------------------------------------------------
/ocr/IdmpExtraction.scala:
--------------------------------------------------------------------------------
1 | / **
2 | * @author Jeff Shmain
3 | * @author Vartika Singh
4 | * /
5 |
6 | package com.cloudera.sa.OCR
7 |
8 | import org.bytedeco.javacpp.lept._;
9 | import org.bytedeco.javacpp.tesseract._;
10 | import java.io.ByteArrayOutputStream;
11 | import java.nio.ByteBuffer;
12 | import java.nio.ByteBuffer
13 | import java.awt.Image
14 | import java.awt.image.RenderedImage;
15 | import java.io.File;
16 | import scala.collection.mutable.StringBuilder
17 | import collection.JavaConversions._
18 | import java.io.IOException;
19 | import java.util.List;
20 | import javax.imageio.ImageIO;
21 | import org.ghost4j.analyzer.AnalysisItem;
22 | import org.ghost4j.analyzer.FontAnalyzer;
23 | import org.ghost4j.document.PDFDocument;
24 | import org.ghost4j.renderer.SimpleRenderer;
25 | import org.bytedeco.javacpp._;
26 | import java.io._
27 | import org.apache.spark.{Logging, SerializableWritable, SparkConf, SparkContext}
28 | import org.apache.hadoop.hbase.HBaseConfiguration
29 | import org.apache.hadoop.hbase.client.Connection
30 | import org.apache.hadoop.hbase.client.ConnectionFactory
31 | import org.apache.hadoop.hbase.TableName
32 | import org.apache.hadoop.hbase.client.Put
33 | import org.apache.hadoop.hbase.client.Get
34 | import org.apache.hadoop.hbase.util.Bytes
35 |
36 | object IdmpExtraction {
37 | def main(args: Array[String]) {
38 |
39 |
40 | val conf = new SparkConf().setAppName("IDMP Processor")
41 | val sc = new SparkContext(conf)
42 |
43 | /** Read in PDFs into the RDD */
44 | val files = sc.binaryFiles ("hdfs://nameservice1/data/raw")
45 | files.map(convertFunc(_)).count
46 | }
47 |
48 |
49 | /** Populate the HBase table
50 | * @param fileName This corresponds to the rowID in HBase
51 | * @param lines The parsed output.
52 | * dataformat: binaryPDF:PortableDataStream
53 | */
54 | def populateHbase (
55 | fileName:String,
56 | lines: String,
57 | pdf:org.apache.spark.input.PortableDataStream) : Unit =
58 | {
59 | /** Configure and open a HBase connection */
60 | val conf = HBaseConfiguration.create()
61 | val conn= ConnectionFactory.createConnection( conf );
62 | val mddsTbl = conn.getTable( TableName.valueOf( "mdds" ));
63 | val cf = "info"
64 | val put = new Put( Bytes.toBytes( fileName ))
65 |
66 | /**
67 | * Extract Fields here using Regexes
68 | * Create Put objects and send to hbase
69 | */
70 | val aAndCP = """(?s)(?m).*\d\d\d\d\d-\d\d\d\d(.*)\nRe: (\w\d\d\d\d\d\d).*""".r
71 | val approvedP = """(?s)(?m).*(You may, therefore, market the device, subject to the general controls provisions of the Act).*""".r
72 |
73 | lines match {
74 | case
75 | aAndCP( addr, casenum ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "submitter_info" ), Bytes.toBytes( addr ) ).add( Bytes.toBytes( cf ), Bytes.toBytes( "case_num" ), Bytes.toBytes( casenum ))
76 | case _ => println( "did not match a regex" )
77 | }
78 |
79 | lines match {
80 | case
81 | approvedP( approved ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "approved" ), Bytes.toBytes( "yes" ))
82 | case _ => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "approved" ), Bytes.toBytes( "no" ))
83 | }
84 |
85 | lines.split("\n").foreach {
86 |
87 | val regNumRegex = """Regulation Number:\s+(.+)""".r
88 | val regNameRegex = """Regulation Name:\s+(.+)""".r
89 | val regClassRegex = """Regulatory Class:\s+(.+)""".r
90 | val productCodeRegex = """Product Code:\s+(.+)""".r
91 | val datedRegex = """Dated:\s+(\w{3,10}\s+\d{1,2},\s+\d{4}).*""".r
92 | val receivedRegex = """Received:\s+(\w{3,10}\s+\d{1,2},\s+\d{4}).*""".r
93 | val deviceNameRegex = """Trade/Device Name:\s+(.+)""".r
94 |
95 | _ match {
96 | case regNumRegex( regNum ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "reg_num" ), Bytes.toBytes( regNum ))
97 | case regNameRegex(regName) => put.add(Bytes.toBytes( cf ), Bytes.toBytes( "reg_name" ), Bytes.toBytes( regName ))
98 | case regClassRegex( regClass ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "reg_class" ), Bytes.toBytes( regClass ))
99 | case productCodeRegex( productCode ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "product_code" ), Bytes.toBytes( productCode ))
100 | case datedRegex( dated ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "dated" ), Bytes.toBytes( dated ))
101 | case receivedRegex( received ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "received" ), Bytes.toBytes( received ))
102 | case deviceNameRegex( deviceName ) => put.add( Bytes.toBytes( cf ), Bytes.toBytes( "device_name" ), Bytes.toBytes( deviceName ))
103 |
104 | case _ => print( "" )
105 | }
106 | }
107 | put.add( Bytes.toBytes( cf ), Bytes.toBytes( "text" ), Bytes.toBytes( lines ))
108 | val pdfBytes = pdf.toArray.clone
109 | put.add(Bytes.toBytes( "obj" ), Bytes.toBytes( "pdf" ), pdfBytes )
110 |
111 | mddsTbl.put( put )
112 | mddsTbl.close
113 | conn.close
114 | }
115 |
116 | /** Method to convert a PDF document to images and hence OCR
117 | * @param PDF File(s) to process
118 | */
119 | def convertFunc (
120 | file: (String, org.apache.spark.input.PortableDataStream)
121 | ) : Unit =
122 | {
123 | /** Render the PDF into a list of images with 300 dpi resolution
124 | * One image per PDF page, a PDF document may have multiple pages
125 | */
126 | val document: PDFDocument = new PDFDocument( );
127 | document.load( file._2.open )
128 | file._2.close
129 | val renderer :SimpleRenderer = new SimpleRenderer( )
130 | renderer.setResolution( 300 )
131 | val images:List[Image] = renderer.render( document )
132 |
133 | /** Iterate through the image list and extract OCR
134 | * using Tesseract API.
135 | */
136 | var r:StringBuilder = new StringBuilder
137 | images.toList.foreach{ x=>
138 | val imageByteStream = new ByteArrayOutputStream( )
139 | ImageIO.write(
140 | x.asInstanceOf[RenderedImage], "png", imageByteStream )
141 | val pix: PIX = pixReadMem(
142 | ByteBuffer.wrap( imageByteStream.toByteArray( ) ).array( ),
143 | ByteBuffer.wrap( imageByteStream.toByteArray( ) ).capacity( )
144 | )
145 | val api: TessBaseAPI = new TessBaseAPI( )
146 | /** We assume the documents are in English here, hence \”eng\” */
147 | api.Init( null, "eng" )
148 | api.SetImage(pix)
149 | r.append(api.GetUTF8Text().getString())
150 | imageByteStream.close
151 | pixDestroy(pix)
152 | api.End
153 | }
154 |
155 | /** Write the generated data into HBase */
156 | populateHbase( file._1, r.toString( ), file._2 )
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/ocr/indexer-config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ocr/morphlines.conf:
--------------------------------------------------------------------------------
1 | SOLR_LOCATOR : {
2 | # Name of solr collection
3 | collection : mdds_collection
4 |
5 | # ZooKeeper ensemble
6 | zkHost : "$ZK_HOST"
7 | }
8 |
9 | morphlines : [
10 | {
11 | id : morphline
12 | importCommands : ["org.kitesdk.**", "com.ngdata.**"]
13 |
14 | commands : [
15 | {
16 | extractHBaseCells {
17 | mappings : [
18 | {
19 | inputColumn : "info:submitter_info"
20 | outputField : "submitter_info"
21 | type : string
22 | source : value
23 | }
24 | {
25 | inputColumn : "info:case_num"
26 | outputField : "case_num"
27 | type : string
28 | source : value
29 | }
30 | {
31 | inputColumn : "info:device_name"
32 | outputField : "device_name"
33 | type : string
34 | source : value
35 | }
36 | {
37 | inputColumn : "info:reg_num"
38 | outputField : "reg_num"
39 | type : string
40 | source : value
41 | }
42 | {
43 | inputColumn : "info:reg_name"
44 | outputField : "reg_name"
45 | type : string
46 | source : value
47 | }
48 | {
49 | inputColumn : "info:reg_class"
50 | outputField : "reg_class"
51 | type : string
52 | source : value
53 | }
54 | {
55 | inputColumn : "info:product_code"
56 | outputField : "product_code"
57 | type : string
58 | source : value
59 | }
60 | {
61 | inputColumn : "info:dated"
62 | outputField : "dated"
63 | type : string
64 | source : value
65 | }
66 | {
67 | inputColumn : "info:received"
68 | outputField : "received"
69 | type : string
70 | source : value
71 | }
72 | {
73 | inputColumn : "info:approved"
74 | outputField : "approved"
75 | type : string
76 | source : value
77 | }
78 | {
79 | inputColumn : "info:text"
80 | outputField : "text"
81 | type : string
82 | source : value
83 | }
84 | ]
85 | }
86 | }
87 |
88 | {
89 | convertTimestamp {
90 | field : dated
91 | inputFormats : ["MMMM d, yyyy", "yyyy-MM-dd","MMMM d,yyyy"]
92 | inputTimezone : America/Los_Angeles
93 | outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
94 | outputTimezone : UTC
95 | }
96 | }
97 | {
98 | convertTimestamp {
99 | field : received
100 | inputFormats : ["MMMM d, yyyy", "yyyy-MM-dd","MMMM d,yyyy"]
101 | inputTimezone : America/Los_Angeles
102 | outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
103 | outputTimezone : UTC
104 | }
105 | }
106 | # Consume the output record of the previous command and pipe another
107 | # record downstream.
108 | #
109 | # This command sanitizes record fields that are unknown to Solr schema.xml
110 | # by deleting them. Recall that Solr throws an exception on any attempt to
111 | # load a document that contains a field that isn't specified in schema.xml
112 | {
113 | sanitizeUnknownSolrFields {
114 | # Location from which to fetch Solr schema
115 | solrLocator : ${SOLR_LOCATOR}
116 | }
117 | }
118 | # { logDebug { format : "output record: {}", args : ["@{}"] } }
119 | ]
120 | }
121 | ]
122 |
--------------------------------------------------------------------------------