├── README.md ├── resources └── imgs │ ├── resources.png │ ├── inphasepipeline.png │ ├── threephasepipeline.png │ └── executionpathinphasepipeline.png ├── src └── main │ ├── resources │ ├── tutorial │ │ ├── ex1 │ │ │ └── RoomNumberAnnotator.yaml │ │ ├── ex3 │ │ │ ├── TutorialDateTime.yaml │ │ │ ├── SimpleTutorialDateTime.yaml │ │ │ └── RoomNumberAnnotator.yaml │ │ ├── ex4 │ │ │ └── MeetingAnnotator.yaml │ │ └── ex2 │ │ │ ├── RemoteRoomNumberAnnotator.yaml │ │ │ └── RoomNumberAnnotator.yaml │ ├── META-INF │ │ └── org.uimafit │ │ │ └── types.txt │ ├── cas_consumer │ │ ├── AnnotationPrinter.yaml │ │ └── XmiWriterCasConsumer.yaml │ ├── collection_reader │ │ ├── fs-collection-reader.yaml │ │ └── filesystem-collection-reader.yaml │ ├── oaqa-tutorial-ex1.yaml │ ├── oaqa-tutorial-ex2.yaml │ ├── oaqa-tutorial-ex2-remote.yaml │ ├── oaqa-tutorial-ex3.yaml │ ├── roomNumber-dateTime-simple.yaml │ ├── roomNumber-dateTime-parallel.yaml │ ├── data │ │ ├── UIMA_Seminars.txt │ │ └── WatsonConferenceRooms.txt │ └── types │ │ ├── SourceDocumentInformation.xml │ │ └── TutorialTypeSystem.xml │ └── java │ ├── test │ └── RegExCompiler.java │ ├── org │ └── apache │ │ └── uima │ │ ├── tutorial │ │ ├── ex6 │ │ │ ├── StringMapResource.java │ │ │ ├── Apache_UIMA.txt │ │ │ ├── StringMapResource_impl.java │ │ │ ├── UimaAcronymAnnotator.java │ │ │ └── UimaMeetingAnnotator.java │ │ ├── DateAnnot.java │ │ ├── TimeAnnot.java │ │ ├── WordAnnot.java │ │ ├── ex1 │ │ │ └── RoomNumberAnnotator.java │ │ ├── SentenceAnnot.java │ │ ├── DateAnnot_Type.java │ │ ├── TimeAnnot_Type.java │ │ ├── UimaMeeting_Type.java │ │ ├── UimaMeeting.java │ │ ├── WordAnnot_Type.java │ │ ├── SentenceAnnot_Type.java │ │ ├── ex2 │ │ │ └── RoomNumberAnnotator.java │ │ ├── ex3 │ │ │ └── RoomNumberAnnotator.java │ │ ├── ex5 │ │ │ └── RoomNumberAnnotator.java │ │ ├── RoomNumber.java │ │ ├── DateTimeAnnot.java │ │ ├── UimaAcronym.java │ │ ├── RoomNumber_Type.java │ │ ├── UimaAcronym_Type.java │ │ ├── DateTimeAnnot_Type.java │ │ └── ex4 │ │ │ └── MeetingAnnotator.java │ │ └── examples │ │ ├── package.html │ │ ├── cpe │ │ └── package.html │ │ ├── cas │ │ └── package.html │ │ ├── tokenizer │ │ ├── Token.java │ │ ├── Sentence.java │ │ ├── Token_Type.java │ │ ├── Sentence_Type.java │ │ └── SimpleTokenAndSentenceAnnotator.java │ │ ├── casMultiplier │ │ ├── CasMultiplierExampleApplication.java │ │ └── SimpleTextSegmenter.java │ │ ├── SofaExampleAnnotator.java │ │ ├── SofaExampleApplication.java │ │ ├── xmi │ │ └── XmiCollectionReader.java │ │ └── ExampleApplication.java │ ├── collection │ └── fs │ │ └── FileCollectionReader.java │ └── example │ ├── PersonTitle.java │ └── PersonTitle_Type.java ├── target └── classes │ ├── tutorial │ ├── ex1 │ │ └── RoomNumberAnnotator.yaml │ ├── ex3 │ │ ├── TutorialDateTime.yaml │ │ ├── SimpleTutorialDateTime.yaml │ │ ├── RoomNumberAndDateTime.yaml │ │ └── RoomNumberAnnotator.yaml │ ├── ex4 │ │ └── MeetingAnnotator.yaml │ └── ex2 │ │ ├── RemoteRoomNumberAnnotator.yaml │ │ └── RoomNumberAnnotator.yaml │ ├── cas_consumer │ ├── AnnotationPrinter.yaml │ └── XmiWriterCasConsumer.yaml │ ├── collection_reader │ ├── fs-collection-reader.yaml │ └── filesystem-collection-reader.yaml │ ├── oaqa-tutorial-ex2.yaml │ ├── oaqa-tutorial-ex1.yaml │ ├── oaqa-tutorial-ex2-remote.yaml │ ├── oaqa-tutorial-ex3.yaml │ ├── roomNumber-dateTime-simple.yaml │ ├── roomNumber-dateTime-parallel.yaml │ └── types │ ├── SourceDocumentInformation.xml │ └── TutorialTypeSystem.xml ├── .project ├── .classpath ├── launches ├── ex1.launch ├── ex2.launch ├── ex3.launch └── ex2-remote.launch ├── data ├── TrainableInformationExtractionSystems.txt ├── SeminarChallengesInSpeechRecognition.txt ├── xml │ ├── UIMA_Seminars.xml │ ├── SeminarChallengesInSpeechRecognition.xml │ ├── WatsonConferenceRooms.xml │ ├── TrainableInformationExtractionSystems.xml │ ├── IBM_LifeSciences.xml │ ├── UIMASummerSchool2003.xml │ └── New_IBM_Fellows.xml ├── IBM_LifeSciences.txt ├── UIMASummerSchool2003.txt └── New_IBM_Fellows.txt └── pom.xml /README.md: -------------------------------------------------------------------------------- 1 | oaqa-tutorial 2 | ============= 3 | 4 | A group of examples based on the CSE pipleline. -------------------------------------------------------------------------------- /resources/imgs/resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/oaqa-tutorial/HEAD/resources/imgs/resources.png -------------------------------------------------------------------------------- /resources/imgs/inphasepipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/oaqa-tutorial/HEAD/resources/imgs/inphasepipeline.png -------------------------------------------------------------------------------- /resources/imgs/threephasepipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/oaqa-tutorial/HEAD/resources/imgs/threephasepipeline.png -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex1/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex1.RoomNumberAnnotator 2 | test: param1 -------------------------------------------------------------------------------- /target/classes/tutorial/ex1/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex1.RoomNumberAnnotator 2 | test: param1 -------------------------------------------------------------------------------- /resources/imgs/executionpathinphasepipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oaqa/oaqa-tutorial/HEAD/resources/imgs/executionpathinphasepipeline.png -------------------------------------------------------------------------------- /src/main/resources/META-INF/org.uimafit/types.txt: -------------------------------------------------------------------------------- 1 | classpath*:types/TutorialTypeSystem.xml 2 | classpath*:types/SourceDocumentInformation.xml 3 | -------------------------------------------------------------------------------- /target/classes/tutorial/ex3/TutorialDateTime.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.TutorialDateTime 2 | 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex3/TutorialDateTime.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.TutorialDateTime 2 | 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/tutorial/ex3/SimpleTutorialDateTime.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.SimpleTutorialDateTime 2 | 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/tutorial/ex4/MeetingAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex4.MeetingAnnotator 2 | WindowSize: 200 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex3/SimpleTutorialDateTime.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.SimpleTutorialDateTime 2 | 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex4/MeetingAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex4.MeetingAnnotator 2 | WindowSize: 200 3 | persistence-provider: | 4 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/cas_consumer/AnnotationPrinter.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.examples.cpe.AnnotationPrinter 2 | 3 | outputFile: output.txt 4 | 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/cas_consumer/AnnotationPrinter.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.examples.cpe.AnnotationPrinter 2 | 3 | outputFile: output.txt 4 | 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/cas_consumer/XmiWriterCasConsumer.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.examples.xmi.XmiWriterCasConsumer 2 | 3 | OutputDirectory: cas-output 4 | 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/cas_consumer/XmiWriterCasConsumer.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.examples.xmi.XmiWriterCasConsumer 2 | 3 | OutputDirectory: cas-output 4 | 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/collection_reader/fs-collection-reader.yaml: -------------------------------------------------------------------------------- 1 | class: collection.fs.FileCollectionReader 2 | 3 | #decorators: | 4 | # - inherit: internal.collection.retrieval-gs-decorator 5 | 6 | persistence-provider: | 7 | inherit: ecd.default-experiment-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/collection_reader/fs-collection-reader.yaml: -------------------------------------------------------------------------------- 1 | class: collection.fs.FileCollectionReader 2 | 3 | #decorators: | 4 | # - inherit: internal.collection.retrieval-gs-decorator 5 | 6 | persistence-provider: | 7 | inherit: ecd.default-experiment-persistence-provider -------------------------------------------------------------------------------- /target/classes/collection_reader/filesystem-collection-reader.yaml: -------------------------------------------------------------------------------- 1 | class: collection.fs.FileSystemCollectionReader 2 | 3 | Language: en 4 | BrowseSubdirectories: FALSE 5 | Encoding: UTF-8 6 | 7 | 8 | #persistence-provider: | 9 | # inherit: ecd.default-experiment-persistence-provider -------------------------------------------------------------------------------- /target/classes/tutorial/ex2/RemoteRoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.ecd.phase.adapter.JMSAdapterWrapper 2 | #brokerUrl: tcp://peace.isri.cs.cmu.edu:61616 3 | brokerUrl: tcp://pkdrm:61616 4 | endpoint: RoomNumberAnnotatorQueue 5 | timeout: 5000 6 | getmetatimeout: 5000 7 | cpctimeout: 5000 -------------------------------------------------------------------------------- /src/main/resources/collection_reader/filesystem-collection-reader.yaml: -------------------------------------------------------------------------------- 1 | class: collection.fs.FileSystemCollectionReader 2 | 3 | Language: en 4 | BrowseSubdirectories: FALSE 5 | Encoding: UTF-8 6 | 7 | 8 | persistence-provider: | 9 | inherit: ecd.default-experiment-persistence-provider 10 | -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex2/RemoteRoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: edu.cmu.lti.oaqa.ecd.phase.adapter.JMSAdapterWrapper 2 | #brokerUrl: tcp://peace.isri.cs.cmu.edu:61616 3 | brokerUrl: tcp://pkdrm:61616 4 | endpoint: RoomNumberAnnotatorQueue 5 | timeout: 5000 6 | getmetatimeout: 5000 7 | cpctimeout: 5000 -------------------------------------------------------------------------------- /target/classes/tutorial/ex2/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex2.RoomNumberAnnotator 2 | 3 | Locations: [Watson - Yorktown, Watson - Hawthrone I, Watson - Hawthorne II] 4 | Patterns: ["\\b[0-4]\\d[0-2]\\d\\d\\b"] 5 | persistence-provider: |1 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex2/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex2.RoomNumberAnnotator 2 | 3 | Locations: [Watson - Yorktown, Watson - Hawthrone I, Watson - Hawthorne II] 4 | Patterns: ["\\b[0-4]\\d[0-2]\\d\\d\\b"] 5 | persistence-provider: |1 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /target/classes/tutorial/ex3/RoomNumberAndDateTime.yaml: -------------------------------------------------------------------------------- 1 | pipeline: 2 | - inherit: ecd.phase 3 | name: RoomNumberAnnotator 4 | options: | 5 | - inherit: tutorial.ex3.RoomNumberAnnotator 6 | - inherit: ecd.phase 7 | name: TutorialDateTime 8 | options: | 9 | - inherit: tutorial.ex3.TutorialDateTime -------------------------------------------------------------------------------- /target/classes/tutorial/ex3/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.RoomNumberAnnotator 2 | 3 | Locations: [Watson - Yorktown, Watson - Hawthrone I, Watson - Hawthorne II] 4 | Patterns: ["\\b[0-4]\\d[0-2]\\d\\d\\b","\\b[G1-4][NS]-[A-Z]\\d\\d\\b","\\bJ[12]-[A-Z]\\d\\d\\b"] 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/resources/tutorial/ex3/RoomNumberAnnotator.yaml: -------------------------------------------------------------------------------- 1 | class: org.apache.uima.tutorial.ex3.RoomNumberAnnotator 2 | 3 | Locations: [Watson - Yorktown, Watson - Hawthrone I, Watson - Hawthorne II] 4 | Patterns: ["\\b[0-4]\\d[0-2]\\d\\d\\b","\\b[G1-4][NS]-[A-Z]\\d\\d\\b","\\bJ[12]-[A-Z]\\d\\d\\b"] 5 | persistence-provider: | 6 | inherit: ecd.default-log-persistence-provider -------------------------------------------------------------------------------- /src/main/java/test/RegExCompiler.java: -------------------------------------------------------------------------------- 1 | package test; 2 | 3 | import java.util.regex.Pattern; 4 | 5 | public class RegExCompiler { 6 | 7 | private Pattern pattern; 8 | 9 | public RegExCompiler(String pattern) { 10 | //this.pattern = new Pattern(pattern); 11 | } 12 | 13 | 14 | public void compile(String input){ 15 | 16 | 17 | } 18 | 19 | 20 | 21 | 22 | } 23 | -------------------------------------------------------------------------------- /target/classes/oaqa-tutorial-ex2.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.fs-collection-reader 8 | file: /data/WatsonConferenceRooms.txt 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex2.RoomNumberAnnotator 14 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /src/main/resources/oaqa-tutorial-ex1.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | collection-reader: 5 | inherit: collection_reader.filesystem-collection-reader 6 | InputDirectory: data/ 7 | 8 | pipeline: 9 | - inherit: ecd.phase 10 | name: RemoteRoomNumberAnnotator 11 | options: | 12 | - inherit: tutorial.ex1.RoomNumberAnnotator 13 | 14 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /src/main/resources/oaqa-tutorial-ex2.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.fs-collection-reader 8 | file: /data/WatsonConferenceRooms.txt 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex2.RoomNumberAnnotator 14 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /target/classes/oaqa-tutorial-ex1.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | collection-reader: 5 | inherit: collection_reader.filesystem-collection-reader 6 | InputDirectory: data/ 7 | 8 | pipeline: 9 | - inherit: ecd.phase 10 | name: RemoteRoomNumberAnnotator 11 | options: | 12 | - inherit: tutorial.ex1.RoomNumberAnnotator 13 | 14 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /src/main/resources/oaqa-tutorial-ex2-remote.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | collection-reader: 5 | inherit: collection_reader.filesystem-collection-reader 6 | InputDirectory: data/ 7 | 8 | pipeline: 9 | - inherit: ecd.phase 10 | name: RemoteRoomNumberAnnotator 11 | options: | 12 | - inherit: tutorial.ex2.RemoteRoomNumberAnnotator 13 | 14 | - inherit: cas_consumer.AnnotationPrinter -------------------------------------------------------------------------------- /target/classes/oaqa-tutorial-ex2-remote.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | collection-reader: 5 | inherit: collection_reader.filesystem-collection-reader 6 | InputDirectory: data/ 7 | 8 | pipeline: 9 | - inherit: ecd.phase 10 | name: RemoteRoomNumberAnnotator 11 | options: | 12 | - inherit: tutorial.ex2.RemoteRoomNumberAnnotator 13 | 14 | - inherit: cas_consumer.AnnotationPrinter -------------------------------------------------------------------------------- /src/main/resources/oaqa-tutorial-ex3.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.TutorialDateTime 18 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /target/classes/oaqa-tutorial-ex3.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.TutorialDateTime 18 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /target/classes/roomNumber-dateTime-simple.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.SimpleTutorialDateTime 18 | - inherit: cas_consumer.AnnotationPrinter -------------------------------------------------------------------------------- /src/main/resources/roomNumber-dateTime-simple.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.SimpleTutorialDateTime 18 | - inherit: cas_consumer.AnnotationPrinter -------------------------------------------------------------------------------- /target/classes/roomNumber-dateTime-parallel.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.SimpleTutorialDateTime 18 | - inherit: tutorial.ex3.TutorialDateTime 19 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /src/main/resources/roomNumber-dateTime-parallel.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | name: oaqa-tutorial 3 | author: oaqa 4 | 5 | 6 | collection-reader: 7 | inherit: collection_reader.filesystem-collection-reader 8 | InputDirectory: data/ 9 | pipeline: 10 | - inherit: ecd.phase 11 | name: RoomNumberAnnotator 12 | options: | 13 | - inherit: tutorial.ex3.RoomNumberAnnotator 14 | - inherit: ecd.phase 15 | name: TutorialDateTime 16 | options: | 17 | - inherit: tutorial.ex3.SimpleTutorialDateTime 18 | - inherit: tutorial.ex3.TutorialDateTime 19 | - inherit: cas_consumer.XmiWriterCasConsumer -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | oaqa-tutorial 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.m2e.core.maven2Nature 21 | org.eclipse.jdt.core.javanature 22 | org.apache.etools.ctc.javaprojectnature 23 | 24 | 25 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/main/resources/data/UIMA_Seminars.txt: -------------------------------------------------------------------------------- 1 | Upcoming UIMA Seminars 2 | 3 | April 7, 2004 Distillery Lunch Seminar 4 | UIMA and its Metadata 5 | 12:00PM-1:00PM in HAW GN-K35. 6 | 7 | Dave Ferrucci will give a UIMA overview and discuss the types of component metadata that UIMA components provide. Jon Lenchner will give a demo of the Text Analysis Engine configurator tool. 8 | 9 | 10 | April 16, 2004 KM & I Department Tea 11 | Title: An Eclipse-based TAE Configurator Tool 12 | 3:00PM-4:30PM in HAW GN-K35 . 13 | 14 | Jon Lenchner will demo an Eclipse plugin for configuring TAE descriptors, which will be available soon for you to use. No more editing XML descriptors by hand! 15 | 16 | 17 | May 11, 2004 UIMA Tutorial 18 | 9:00AM-5:00PM in HAW GN-K35. 19 | 20 | This is a full-day, hands-on tutorial on UIMA, covering the development of Text Analysis Engines and Collection Processing Engines, as well as how to include these components in your own applications. 21 | -------------------------------------------------------------------------------- /launches/ex1.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /launches/ex2.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /launches/ex3.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /launches/ex2-remote.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex6/StringMapResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex6; 21 | 22 | /** 23 | * 24 | * 25 | */ 26 | public interface StringMapResource { 27 | public String get(String aKey); 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/package.html: -------------------------------------------------------------------------------- 1 | 21 | 22 | Examples illustrating how to use the UIMA Framework. 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/cpe/package.html: -------------------------------------------------------------------------------- 1 | 21 | 22 | Examples of using the Collection Processing Manager (CPM). 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/cas/package.html: -------------------------------------------------------------------------------- 1 | 21 | 22 | Examples of Annotators that use the CAS Interface directly, not the JCAS. 23 | 24 | 25 | -------------------------------------------------------------------------------- /data/TrainableInformationExtractionSystems.txt: -------------------------------------------------------------------------------- 1 | Adventurous Research Summer Seminar Series - Trainable Information Extraction Systems 2 | 3 | August 19, 2003 02:00 PM - 03:30 PM 4 | David Johnson, Frank Oles, Tong Zhang(IBM Research) 5 | Hawthorne GN-F15 6 | Availability: Open 7 | 8 | The technical objective of the TIES project is to build customizable systems that can identify named entities in text, such as persons, organizations, and locations, as well as identifying relations between those entities. The technical approach is to develop new statistical and symbolic machine learning algorithms in service of the technical objective. Also, we are working on combining statistical with symbolic techniques. The first part of this talk, given by David E. Johnson, will provide a general overview of the goals of the TIES project. The second part, given by Tong Zhang, will provide background on applying statistical machine learning to this problem domain. Tong will also describe the particular statistical approach taken, which is termed Robust Risk Minimization (RMM). The final part will be given by Frank J. Oles. Frank will introduce his theory of precedence-inclusion patterns. Precedence-inclusion patterns are mathematical structures possessing multiple interacting strict partial orders that satisfy axioms generalizing the familiar properties of irreflexivity and transitivity. This very general theory provides a radically new approach to symbolic, as opposed to statistical, pattern generalization that can be applied to relational learning in a number of settings, including learning based on text, on images, or on videos. 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /data/SeminarChallengesInSpeechRecognition.txt: -------------------------------------------------------------------------------- 1 | UIT Seminar: Challenges in Speech Recognition 2 | August 8, 2003 10:30 AM - 11:30 AM 3 | Lawrence Rabiner , Associate Director CAIP, Rutgers 4 | University, Professor Univ. of Santa Barbara 5 | Yorktown 20-043 6 | Availability: Open 7 | 8 | Speech recognition has matured to the point where it 9 | is now being widely applied in a range of applications 10 | including desktop dictation, cell phone name dialing, 11 | agent technology, automated operator services, 12 | telematics, call center automation and help desks. 13 | 14 | Although the technology is often good enough for many 15 | of these applications, there remain key challenges in 16 | virtually every aspect of speech recognition that 17 | prevent the technology from being used ubiquitously in 18 | any environment, for any speaker, and for an even 19 | broader range of applications. This talk will analyze 20 | the ‘Speech Circle’ that enables a person to maintain 21 | a dialog with a machine using speech recognition, 22 | spoken language understanding, dialog management and 23 | spoken language generation, and finally text-to-speech 24 | synthesis, and show where significant progress has 25 | been made, and where there remain critical problems 26 | that need to be addressed and solved. 27 | 28 | The talk will include several audio and video examples 29 | of speech recognition and speech understanding systems 30 | that have been studied in the laboratory to illustrate 31 | the challenges that remain to be solved before speech 32 | recognition is considered a solved problem. 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/main/resources/data/WatsonConferenceRooms.txt: -------------------------------------------------------------------------------- 1 | Conference Rooms at Watson: 2 | Location Capacity Wall Phone Ext. 3 | 4 | Classroom Style 5 | HAW J2-B34 Seats 12 tieline 863-3130 6 | HAW J2-N07 Seats 24 tieline 863-3210 7 | YKT 20-001 Seats 36 tieline 862-4304 8 | YKT 20-051 Seats 18 tieline 862-4307 9 | 10 | Conference Style 11 | HAW 2N-F28 Seats 20 tieline 863-7583 12 | HAW 4N-B15 Seats 14 tieline 863-7126 13 | HAW 4N-B17 Seats 10 tieline 863-7089 14 | HAW 4S-K21 Seats 16 tieline 863-6386 15 | HAW GN-F14 Seats 12 tieline 863-6770 16 | HAW GN-K30 Seats 12 tieline 863-7335 17 | HAW GN-K36 Seats 10 tieline 863-6098 18 | HAW J1-N14 Seats 24 tieline 863-3629 19 | HAW J2-A16 Seats 12 tieline 863-3240 20 | HAW J2-G27 Seats 15 tieline 863-3150 21 | HAW J2-M24 Seats 8 tieline 863-3160 22 | YKT 03-135 Seats 8 tieline 862-1696 23 | YKT 03-235 Seats 8 tieline 862-4278 24 | YKT 05-135 Seats 8 tieline 862-3477 25 | YKT 05-235 Seats 8 tieline 862-4279 26 | YKT 20-006 Seats 8 tieline 862-4301 27 | YKT 20-059 Seats 20 tieline 862-4308 28 | YKT 35-132 Seats 8 tieline 862-2873 29 | YKT 35-232 Seats 8 tieline 862-2860 30 | YKT 38-023 Seats 8 tieline 862-3299 31 | YKT 39-132 Seats 8 tieline 862-3486 32 | YKT 40-100 Seats 20 tieline 862-4199 33 | YKT 40-200 Seats 20 tieline 862-1379 34 | 35 | Other 36 | HAW GN-K35 Seats 24 tieline 863-6104 37 | 38 | Theater Style 39 | HAW 1S-F40 Seats 30 tieline 863-6396 40 | YKT 20-043 Seats 50 tieline 862-4306 41 | 42 | Video Conference Room 43 | YKT 32-026 Seats 25 tieline 862-3917 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/tokenizer/Token.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.tokenizer; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | public class Token extends Annotation { 28 | 29 | public final static int typeIndexID = JCasRegistry.register(Token.class); 30 | 31 | public final static int type = typeIndexID; 32 | 33 | public int getTypeIndexID() { 34 | return typeIndexID; 35 | } 36 | 37 | // Never called. Disable default constructor 38 | protected Token() { 39 | } 40 | 41 | /** Internal - Constructor used by generator */ 42 | public Token(int addr, TOP_Type type) { 43 | super(addr, type); 44 | } 45 | 46 | public Token(JCas jcas) { 47 | super(jcas); 48 | } 49 | 50 | public Token(JCas jcas, int start, int end) { 51 | super(jcas, start, end); 52 | } 53 | /** 54 | * Write your own initialization here 55 | * 56 | @generated modifiable */ 57 | private void readObject() {/*default - does nothing empty block */} 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/tokenizer/Sentence.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.tokenizer; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | public class Sentence extends Annotation { 28 | 29 | public final static int typeIndexID = JCasRegistry.register(Sentence.class); 30 | 31 | public final static int type = typeIndexID; 32 | 33 | public int getTypeIndexID() { 34 | return typeIndexID; 35 | } 36 | 37 | // Never called. Disable default constructor 38 | protected Sentence() { 39 | } 40 | 41 | /** Internal - Constructor used by generator */ 42 | public Sentence(int addr, TOP_Type type) { 43 | super(addr, type); 44 | } 45 | 46 | public Sentence(JCas jcas) { 47 | super(jcas); 48 | } 49 | 50 | public Sentence(JCas jcas, int start, int end) { 51 | super(jcas, start, end); 52 | } 53 | /** 54 | * Write your own initialization here 55 | * 56 | @generated modifiable */ 57 | private void readObject() {/*default - does nothing empty block */} 58 | 59 | } 60 | -------------------------------------------------------------------------------- /data/xml/UIMA_Seminars.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | Upcoming UIMA Seminars 26 | 15 March 2004 27 | 28 | April 7, 2004 Distillery Lunch Seminar 29 | UIMA and its Metadata 30 | 12:00PM-1:00PM in HAW GN-K35. 31 | 32 | Dave Ferrucci will give a UIMA overview and discuss the types of component metadata that UIMA components provide. Jon Lenchner will give a demo of the Text Analysis Engine configurator tool. 33 | 34 | 35 | April 16, 2004 KM & I Department Tea 36 | Title: An Eclipse-based TAE Configurator Tool 37 | 3:00PM-4:30PM in HAW GN-K35 . 38 | 39 | Jon Lenchner will demo an Eclipse plugin for configuring TAE descriptors, which will be available soon for you to use. No more editing XML descriptors by hand! 40 | 41 | 42 | May 11, 2004 UIMA Tutorial 43 | 9:00AM-5:00PM in HAW GN-K35. 44 | 45 | This is a full-day, hands-on tutorial on UIMA, covering the development of Text Analysis Engines and Collection Processing Engines, as well as how to include these components in your own applications. 46 | 47 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | edu.cmu.lti.oaqa.tutorial 4 | oaqa-tutorial 5 | 1.0.0-SNAPSHOT 6 | 7 | org.sonatype.oss 8 | oss-parent 9 | 7 10 | 11 | Base Question Answering Pipeline 12 | https://github.com/oaqa/oaqa-tutorial 13 | 14 | github.com 15 | https://github.com/oaqa/oaqa-tutorial/issues 16 | 17 | 2012 18 | 19 | 20 | The Apache Software License, Version 2.0 21 | http://www.apache.org/licenses/LICENSE-2.0.txt 22 | repo 23 | 24 | 25 | 26 | git@github.com:oaqa/oaqa-tutorial.git 27 | scm:git:git@github.com:oaqa/oaqa-tutorial.git 28 | scm:git:git@github.com:oaqa/oaqa-tutorial.git 29 | 30 | 31 | 32 | 33 | org.apache.maven.plugins 34 | maven-compiler-plugin 35 | 36 | 1.6 37 | 1.6 38 | 39 | 40 | 41 | 42 | 43 | 44 | edu.cmu.lti.oaqa.cse 45 | cse-framework 46 | 2.3.2 47 | 48 | 49 | org.apache.uima 50 | uimaj-as-activemq 51 | 2.4.0 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/DateAnnot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | 26 | /** 27 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 28 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 29 | * 30 | * @generated 31 | */ 32 | public class DateAnnot extends DateTimeAnnot { 33 | /** 34 | * @generated 35 | * @ordered 36 | */ 37 | public final static int typeIndexID = JCasRegistry.register(DateAnnot.class); 38 | 39 | /** 40 | * @generated 41 | * @ordered 42 | */ 43 | public final static int type = typeIndexID; 44 | 45 | /** @generated */ 46 | public int getTypeIndexID() { 47 | return typeIndexID; 48 | } 49 | 50 | /** 51 | * Never called. Disable default constructor 52 | * 53 | * @generated 54 | */ 55 | protected DateAnnot() { 56 | } 57 | 58 | /** 59 | * Internal - constructor used by generator 60 | * 61 | * @generated 62 | */ 63 | public DateAnnot(int addr, TOP_Type type) { 64 | super(addr, type); 65 | readObject(); 66 | } 67 | 68 | /** @generated */ 69 | public DateAnnot(JCas jcas) { 70 | super(jcas); 71 | readObject(); 72 | } 73 | 74 | public DateAnnot(JCas jcas, int begin, int end) { 75 | super(jcas); 76 | setBegin(begin); 77 | setEnd(end); 78 | readObject(); 79 | } 80 | 81 | /** 82 | * Write your own initialization here 83 | * 84 | * @generated modifiable 85 | */ 86 | private void readObject() { 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/TimeAnnot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | 26 | /** 27 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 28 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 29 | * 30 | * @generated 31 | */ 32 | public class TimeAnnot extends DateTimeAnnot { 33 | /** 34 | * @generated 35 | * @ordered 36 | */ 37 | public final static int typeIndexID = JCasRegistry.register(TimeAnnot.class); 38 | 39 | /** 40 | * @generated 41 | * @ordered 42 | */ 43 | public final static int type = typeIndexID; 44 | 45 | /** @generated */ 46 | public int getTypeIndexID() { 47 | return typeIndexID; 48 | } 49 | 50 | /** 51 | * Never called. Disable default constructor 52 | * 53 | * @generated 54 | */ 55 | protected TimeAnnot() { 56 | } 57 | 58 | /** 59 | * Internal - constructor used by generator 60 | * 61 | * @generated 62 | */ 63 | public TimeAnnot(int addr, TOP_Type type) { 64 | super(addr, type); 65 | readObject(); 66 | } 67 | 68 | /** @generated */ 69 | public TimeAnnot(JCas jcas) { 70 | super(jcas); 71 | readObject(); 72 | } 73 | 74 | public TimeAnnot(JCas jcas, int begin, int end) { 75 | super(jcas); 76 | setBegin(begin); 77 | setEnd(end); 78 | readObject(); 79 | } 80 | 81 | /** 82 | * Write your own initialization here 83 | * 84 | * @generated modifiable 85 | */ 86 | private void readObject() { 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex6/Apache_UIMA.txt: -------------------------------------------------------------------------------- 1 | 1|Welcome to Apache UIMA (Unstructured Information Management Architecture), a incubator project of the Apache Software Foundation (ASF). 2 | 2|Our goal is a thriving community of users and developers of UIMA frameworks, supporting components for analysing unstructured content such as text, audio and video. 3 | 4 | What is UIMA? 5 | 6 | Unstructured Information Management applications are software systems that analyze large volumes of unstructured information in order to discover knowledge that is relevant to an end user. 7 | UIMA is a framework and SDK for developing such applications. An example UIM application might ingest plain text and identify entities, such as persons, places, organizations; or relations, such as works-for or located-at. 8 | UIMA enables such an application to be decomposed into components, for example "language identification" -> "language specific segmentation" -> "sentence boundary detection" -> "entity detection (person/place names etc.)". 9 | Each component must implement interfaces defined by the framework and must provide self-describing metadata via XML descriptor files. The framework manages these components and the data flow between them. Components are written in Java or C++; the data that flows between components is designed for efficient mapping between these languages. 10 | UIMA additionally provides capabilities to wrap components as network services, and can scale to very large volumes by replicating processing pipelines over a cluster of networked nodes. 11 | 12 | Apache UIMA is an Apache-licensed open source implementation of the UIMA specification (that specification is, in turn, being developed concurrently by a technical committee within OASIS , a standards organization). 13 | We invite and encourage you to participate in both the implementation and specification efforts. 14 | 15 | UIMA is a component framework for analysing unstructured content such as text, audio and video. 16 | It comprises an SDK and tooling for composing and running analytic components written in Java and C++, with some support for Perl, Python and TCL. 17 | 18 | 19 | Apache UIMA mailing lists: 20 | 21 | Users - uima-user@incubator.apache.org 22 | Developers - uima-dev@incubator.apache.org 23 | Commits - uima-commits@incubator.apache.org 24 | 25 | 26 | Apache UIMA project committers: 27 | 28 | Michael Baessler 29 | Edward Epstein 30 | Thilo Goetz 31 | Adam Lally 32 | Marshall Schor 33 | 34 | 35 | Apache UIMA project Mentors: 36 | 37 | Ken Coar (ASF member and Vice President) 38 | Sam Ruby (ASF member) -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/WordAnnot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 29 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 30 | * 31 | * @generated 32 | */ 33 | public class WordAnnot extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(WordAnnot.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected WordAnnot() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public WordAnnot(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public WordAnnot(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public WordAnnot(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex1/RoomNumberAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex1; 21 | 22 | import java.util.regex.Matcher; 23 | import java.util.regex.Pattern; 24 | 25 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 26 | import org.apache.uima.jcas.JCas; 27 | import org.apache.uima.tutorial.RoomNumber; 28 | 29 | /** 30 | * Example annotator that detects room numbers using Java 1.4 regular expressions. 31 | */ 32 | public class RoomNumberAnnotator extends JCasAnnotator_ImplBase { 33 | private Pattern mYorktownPattern = Pattern.compile("\\b[0-4]\\d-[0-2]\\d\\d\\b"); 34 | 35 | private Pattern mHawthornePattern = Pattern.compile("\\b[JG1-4][1-2NS]-[A-Z]\\d\\d\\b"); 36 | 37 | /** 38 | * @see JCasAnnotator_ImplBase#process(JCas) 39 | */ 40 | public void process(JCas aJCas) { 41 | // get document text 42 | String docText = aJCas.getDocumentText(); 43 | // search for Yorktown room numbers 44 | Matcher matcher = mYorktownPattern.matcher(docText); 45 | while (matcher.find()) { 46 | // found one - create annotation 47 | RoomNumber annotation = new RoomNumber(aJCas); 48 | annotation.setBegin(matcher.start()); 49 | annotation.setEnd(matcher.end()); 50 | annotation.setBuilding("Yorktown"); 51 | annotation.addToIndexes(); 52 | } 53 | // search for Hawthorne room numbers 54 | matcher = mHawthornePattern.matcher(docText); 55 | while (matcher.find()) { 56 | // found one - create annotation 57 | RoomNumber annotation = new RoomNumber(aJCas); 58 | annotation.setBegin(matcher.start()); 59 | annotation.setEnd(matcher.end()); 60 | annotation.setBuilding("Hawthorne"); 61 | annotation.addToIndexes(); 62 | } 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/SentenceAnnot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * Updated by JCasGen Mon Nov 29 15:02:37 EST 2004 XML source: C:/Program 29 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 30 | * 31 | * @generated 32 | */ 33 | public class SentenceAnnot extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(SentenceAnnot.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected SentenceAnnot() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public SentenceAnnot(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public SentenceAnnot(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public SentenceAnnot(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/tokenizer/Token_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.tokenizer; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | import org.apache.uima.jcas.tcas.Annotation_Type; 30 | 31 | public class Token_Type extends Annotation_Type { 32 | protected FSGenerator getFSGenerator() { 33 | return fsGenerator; 34 | } 35 | 36 | private final FSGenerator fsGenerator = new FSGenerator() { 37 | public FeatureStructure createFS(int addr, CASImpl cas) { 38 | if (instanceOf_Type.useExistingInstance) { 39 | // Return eq fs instance if already created 40 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 41 | if (null == fs) { 42 | fs = new Token(addr, instanceOf_Type); 43 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 44 | return fs; 45 | } 46 | return fs; 47 | } else 48 | return new Token(addr, instanceOf_Type); 49 | } 50 | }; 51 | 52 | public final static int typeIndexID = Token.typeIndexID; 53 | 54 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima_examples.tokenizer.Token"); 55 | 56 | // * initialize variables to correspond with Cas Type and Features 57 | public Token_Type(JCas jcas, Type casType) { 58 | super(jcas, casType); 59 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 60 | 61 | } 62 | 63 | protected Token_Type() { // block default new operator 64 | throw new RuntimeException("Internal Error-this constructor should never be called."); 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/tokenizer/Sentence_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.tokenizer; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | import org.apache.uima.jcas.tcas.Annotation_Type; 30 | 31 | public class Sentence_Type extends Annotation_Type { 32 | protected FSGenerator getFSGenerator() { 33 | return fsGenerator; 34 | } 35 | 36 | private final FSGenerator fsGenerator = new FSGenerator() { 37 | public FeatureStructure createFS(int addr, CASImpl cas) { 38 | if (instanceOf_Type.useExistingInstance) { 39 | // Return eq fs instance if already created 40 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 41 | if (null == fs) { 42 | fs = new Sentence(addr, instanceOf_Type); 43 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 44 | return fs; 45 | } 46 | return fs; 47 | } else 48 | return new Sentence(addr, instanceOf_Type); 49 | } 50 | }; 51 | 52 | public final static int typeIndexID = Sentence.typeIndexID; 53 | 54 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima_examples.tokenizer.Sentence"); 55 | 56 | // * initialize variables to correspond with Cas Type and Features 57 | public Sentence_Type(JCas jcas, Type casType) { 58 | super(jcas, casType); 59 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 60 | 61 | } 62 | 63 | protected Sentence_Type() { // block default new operator 64 | throw new RuntimeException("Internal Error-this constructor should never be called."); 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex6/StringMapResource_impl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex6; 21 | 22 | import java.io.BufferedReader; 23 | import java.io.IOException; 24 | import java.io.InputStream; 25 | import java.io.InputStreamReader; 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | import org.apache.uima.resource.DataResource; 30 | import org.apache.uima.resource.ResourceInitializationException; 31 | import org.apache.uima.resource.SharedResourceObject; 32 | 33 | /** 34 | * 35 | * 36 | */ 37 | public class StringMapResource_impl implements StringMapResource, SharedResourceObject { 38 | private Map mMap = new HashMap(); 39 | 40 | /** 41 | * @see org.apache.uima.resource.SharedResourceObject#load(DataResource) 42 | */ 43 | public void load(DataResource aData) throws ResourceInitializationException { 44 | InputStream inStr = null; 45 | try { 46 | // open input stream to data 47 | inStr = aData.getInputStream(); 48 | // read each line 49 | BufferedReader reader = new BufferedReader(new InputStreamReader(inStr)); 50 | String line; 51 | while ((line = reader.readLine()) != null) { 52 | // the first tab on each line separates key from value. 53 | // Keys cannot contain whitespace. 54 | int tabPos = line.indexOf('\t'); 55 | String key = line.substring(0, tabPos); 56 | String val = line.substring(tabPos + 1); 57 | mMap.put(key, val); 58 | } 59 | } catch (IOException e) { 60 | throw new ResourceInitializationException(e); 61 | } finally { 62 | if (inStr != null) { 63 | try { 64 | inStr.close(); 65 | } catch (IOException e) { 66 | } 67 | } 68 | } 69 | 70 | } 71 | 72 | /** 73 | * @see StringMapResource#get(String) 74 | */ 75 | public String get(String aKey) { 76 | return (String) mMap.get(aKey); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/DateAnnot_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | 30 | /** 31 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 32 | * 33 | * @generated 34 | */ 35 | public class DateAnnot_Type extends DateTimeAnnot_Type { 36 | /** @generated */ 37 | protected FSGenerator getFSGenerator() { 38 | return fsGenerator; 39 | } 40 | 41 | /** @generated */ 42 | private final FSGenerator fsGenerator = new FSGenerator() { 43 | public FeatureStructure createFS(int addr, CASImpl cas) { 44 | if (instanceOf_Type.useExistingInstance) { 45 | // Return eq fs instance if already created 46 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 47 | if (null == fs) { 48 | fs = new DateAnnot(addr, instanceOf_Type); 49 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 50 | return fs; 51 | } 52 | return fs; 53 | } else 54 | return new DateAnnot(addr, instanceOf_Type); 55 | } 56 | }; 57 | 58 | /** @generated */ 59 | public final static int typeIndexID = DateAnnot.typeIndexID; 60 | 61 | /** 62 | * @generated 63 | * @modifiable 64 | */ 65 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.DateAnnot"); 66 | 67 | /** 68 | * initialize variables to correspond with Cas Type and Features 69 | * 70 | * @generated 71 | */ 72 | public DateAnnot_Type(JCas jcas, Type casType) { 73 | super(jcas, casType); 74 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/TimeAnnot_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | 30 | /** 31 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 32 | * 33 | * @generated 34 | */ 35 | public class TimeAnnot_Type extends DateTimeAnnot_Type { 36 | /** @generated */ 37 | protected FSGenerator getFSGenerator() { 38 | return fsGenerator; 39 | } 40 | 41 | /** @generated */ 42 | private final FSGenerator fsGenerator = new FSGenerator() { 43 | public FeatureStructure createFS(int addr, CASImpl cas) { 44 | if (instanceOf_Type.useExistingInstance) { 45 | // Return eq fs instance if already created 46 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 47 | if (null == fs) { 48 | fs = new TimeAnnot(addr, instanceOf_Type); 49 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 50 | return fs; 51 | } 52 | return fs; 53 | } else 54 | return new TimeAnnot(addr, instanceOf_Type); 55 | } 56 | }; 57 | 58 | /** @generated */ 59 | public final static int typeIndexID = TimeAnnot.typeIndexID; 60 | 61 | /** 62 | * @generated 63 | * @modifiable 64 | */ 65 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.TimeAnnot"); 66 | 67 | /** 68 | * initialize variables to correspond with Cas Type and Features 69 | * 70 | * @generated 71 | */ 72 | public TimeAnnot_Type(JCas jcas, Type casType) { 73 | super(jcas, casType); 74 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/UimaMeeting_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | 30 | /** 31 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 32 | * 33 | * @generated 34 | */ 35 | public class UimaMeeting_Type extends Meeting_Type { 36 | /** @generated */ 37 | protected FSGenerator getFSGenerator() { 38 | return fsGenerator; 39 | } 40 | 41 | /** @generated */ 42 | private final FSGenerator fsGenerator = new FSGenerator() { 43 | public FeatureStructure createFS(int addr, CASImpl cas) { 44 | if (instanceOf_Type.useExistingInstance) { 45 | // Return eq fs instance if already created 46 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 47 | if (null == fs) { 48 | fs = new UimaMeeting(addr, instanceOf_Type); 49 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 50 | return fs; 51 | } 52 | return fs; 53 | } else 54 | return new UimaMeeting(addr, instanceOf_Type); 55 | } 56 | }; 57 | 58 | /** @generated */ 59 | public final static int typeIndexID = UimaMeeting.typeIndexID; 60 | 61 | /** 62 | * @generated 63 | * @modifiable 64 | */ 65 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.UimaMeeting"); 66 | 67 | /** 68 | * initialize variables to correspond with Cas Type and Features 69 | * 70 | * @generated 71 | */ 72 | public UimaMeeting_Type(JCas jcas, Type casType) { 73 | super(jcas, casType); 74 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/UimaMeeting.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | 26 | /** 27 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 28 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 29 | * 30 | * @generated 31 | */ 32 | public class UimaMeeting extends Meeting { 33 | /** 34 | * @generated 35 | * @ordered 36 | */ 37 | public final static int typeIndexID = JCasRegistry.register(UimaMeeting.class); 38 | 39 | /** 40 | * @generated 41 | * @ordered 42 | */ 43 | public final static int type = typeIndexID; 44 | 45 | /** @generated */ 46 | public int getTypeIndexID() { 47 | return typeIndexID; 48 | } 49 | 50 | /** 51 | * Never called. Disable default constructor 52 | * 53 | * @generated 54 | */ 55 | protected UimaMeeting() { 56 | } 57 | 58 | /** 59 | * Internal - constructor used by generator 60 | * 61 | * @generated 62 | */ 63 | public UimaMeeting(int addr, TOP_Type type) { 64 | super(addr, type); 65 | readObject(); 66 | } 67 | 68 | /** @generated */ 69 | public UimaMeeting(JCas jcas) { 70 | super(jcas); 71 | readObject(); 72 | } 73 | 74 | public UimaMeeting(JCas jcas, int begin, int end) { 75 | super(jcas); 76 | setBegin(begin); 77 | setEnd(end); 78 | readObject(); 79 | } 80 | 81 | /** 82 | * Write your own initialization here 83 | * 84 | * @generated modifiable 85 | */ 86 | private void readObject() { 87 | } 88 | 89 | /** Custom constructor taking all parameters */ 90 | public UimaMeeting(JCas jcas, int start, int end, RoomNumber room, DateAnnot date, 91 | TimeAnnot startTime, TimeAnnot endTime) { 92 | super(jcas, start, end, room, date, startTime, endTime); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/WordAnnot_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | import org.apache.uima.jcas.tcas.Annotation_Type; 30 | 31 | /** 32 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 33 | * 34 | * @generated 35 | */ 36 | public class WordAnnot_Type extends Annotation_Type { 37 | /** @generated */ 38 | protected FSGenerator getFSGenerator() { 39 | return fsGenerator; 40 | } 41 | 42 | /** @generated */ 43 | private final FSGenerator fsGenerator = new FSGenerator() { 44 | public FeatureStructure createFS(int addr, CASImpl cas) { 45 | if (instanceOf_Type.useExistingInstance) { 46 | // Return eq fs instance if already created 47 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 48 | if (null == fs) { 49 | fs = new WordAnnot(addr, instanceOf_Type); 50 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 51 | return fs; 52 | } 53 | return fs; 54 | } else 55 | return new WordAnnot(addr, instanceOf_Type); 56 | } 57 | }; 58 | 59 | /** @generated */ 60 | public final static int typeIndexID = WordAnnot.typeIndexID; 61 | 62 | /** 63 | * @generated 64 | * @modifiable 65 | */ 66 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.WordAnnot"); 67 | 68 | /** 69 | * initialize variables to correspond with Cas Type and Features 70 | * 71 | * @generated 72 | */ 73 | public WordAnnot_Type(JCas jcas, Type casType) { 74 | super(jcas, casType); 75 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 76 | 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /data/xml/SeminarChallengesInSpeechRecognition.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 24 | 25 | 26 | UIT Seminar: Challenges in Speech Recognition 27 | 8 August 2003 28 | 29 | UIT Seminar: Challenges in Speech Recognition 30 | August 8, 2003 10:30 AM - 11:30 AM 31 | Lawrence Rabiner , Associate Director CAIP, Rutgers 32 | University, Professor Univ. of Santa Barbara 33 | Yorktown 20-043 34 | Availability: Open 35 | 36 | Speech recognition has matured to the point where it 37 | is now being widely applied in a range of applications 38 | including desktop dictation, cell phone name dialing, 39 | agent technology, automated operator services, 40 | telematics, call center automation and help desks. 41 | 42 | Although the technology is often good enough for many 43 | of these applications, there remain key challenges in 44 | virtually every aspect of speech recognition that 45 | prevent the technology from being used ubiquitously in 46 | any environment, for any speaker, and for an even 47 | broader range of applications. This talk will analyze 48 | the ‘Speech Circle’ that enables a person to maintain 49 | a dialog with a machine using speech recognition, 50 | spoken language understanding, dialog management and 51 | spoken language generation, and finally text-to-speech 52 | synthesis, and show where significant progress has 53 | been made, and where there remain critical problems 54 | that need to be addressed and solved. 55 | 56 | The talk will include several audio and video examples 57 | of speech recognition and speech understanding systems 58 | that have been studied in the laboratory to illustrate 59 | the challenges that remain to be solved before speech 60 | recognition is considered a solved problem. 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /data/xml/WatsonConferenceRooms.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | Conference Rooms at Watson 26 | 01 January 2000 27 | 28 | Conference Rooms at Watson: 29 | Location Capacity Wall Phone Ext. 30 | 31 | Classroom Style 32 | HAW J2-B34 Seats 12 tieline 863-3130 33 | HAW J2-N07 Seats 24 tieline 863-3210 34 | YKT 20-001 Seats 36 tieline 862-4304 35 | YKT 20-051 Seats 18 tieline 862-4307 36 | 37 | Conference Style 38 | HAW 2N-F28 Seats 20 tieline 863-7583 39 | HAW 4N-B15 Seats 14 tieline 863-7126 40 | HAW 4N-B17 Seats 10 tieline 863-7089 41 | HAW 4S-K21 Seats 16 tieline 863-6386 42 | HAW GN-F14 Seats 12 tieline 863-6770 43 | HAW GN-K30 Seats 12 tieline 863-7335 44 | HAW GN-K36 Seats 10 tieline 863-6098 45 | HAW J1-N14 Seats 24 tieline 863-3629 46 | HAW J2-A16 Seats 12 tieline 863-3240 47 | HAW J2-G27 Seats 15 tieline 863-3150 48 | HAW J2-M24 Seats 8 tieline 863-3160 49 | YKT 03-135 Seats 8 tieline 862-1696 50 | YKT 03-235 Seats 8 tieline 862-4278 51 | YKT 05-135 Seats 8 tieline 862-3477 52 | YKT 05-235 Seats 8 tieline 862-4279 53 | YKT 20-006 Seats 8 tieline 862-4301 54 | YKT 20-059 Seats 20 tieline 862-4308 55 | YKT 35-132 Seats 8 tieline 862-2873 56 | YKT 35-232 Seats 8 tieline 862-2860 57 | YKT 38-023 Seats 8 tieline 862-3299 58 | YKT 39-132 Seats 8 tieline 862-3486 59 | YKT 40-100 Seats 20 tieline 862-4199 60 | YKT 40-200 Seats 20 tieline 862-1379 61 | 62 | Other 63 | HAW GN-K35 Seats 24 tieline 863-6104 64 | 65 | Theater Style 66 | HAW 1S-F40 Seats 30 tieline 863-6396 67 | YKT 20-043 Seats 50 tieline 862-4306 68 | 69 | Video Conference Room 70 | YKT 32-026 Seats 25 tieline 862-3917 71 | 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/SentenceAnnot_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.FeatureStructure; 23 | import org.apache.uima.cas.Type; 24 | import org.apache.uima.cas.impl.CASImpl; 25 | import org.apache.uima.cas.impl.FSGenerator; 26 | import org.apache.uima.cas.impl.TypeImpl; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.jcas.JCasRegistry; 29 | import org.apache.uima.jcas.tcas.Annotation_Type; 30 | 31 | /** 32 | * Updated by JCasGen Mon Nov 29 15:02:37 EST 2004 33 | * 34 | * @generated 35 | */ 36 | public class SentenceAnnot_Type extends Annotation_Type { 37 | /** @generated */ 38 | protected FSGenerator getFSGenerator() { 39 | return fsGenerator; 40 | } 41 | 42 | /** @generated */ 43 | private final FSGenerator fsGenerator = new FSGenerator() { 44 | public FeatureStructure createFS(int addr, CASImpl cas) { 45 | if (instanceOf_Type.useExistingInstance) { 46 | // Return eq fs instance if already created 47 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 48 | if (null == fs) { 49 | fs = new SentenceAnnot(addr, instanceOf_Type); 50 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 51 | return fs; 52 | } 53 | return fs; 54 | } else 55 | return new SentenceAnnot(addr, instanceOf_Type); 56 | } 57 | }; 58 | 59 | /** @generated */ 60 | public final static int typeIndexID = SentenceAnnot.typeIndexID; 61 | 62 | /** 63 | * @generated 64 | * @modifiable 65 | */ 66 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.SentenceAnnot"); 67 | 68 | /** 69 | * initialize variables to correspond with Cas Type and Features 70 | * 71 | * @generated 72 | */ 73 | public SentenceAnnot_Type(JCas jcas, Type casType) { 74 | super(jcas, casType); 75 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 76 | 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /data/xml/TrainableInformationExtractionSystems.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | Adventurous Research Summer Seminar Series - Trainable Information Extraction Systems 26 | 19 August 2003 27 | 28 | Adventurous Research Summer Seminar Series - Trainable Information Extraction Systems 29 | 30 | August 19, 2003 02:00 PM - 03:30 PM 31 | David Johnson, Frank Oles, Tong Zhang(IBM Research) 32 | Hawthorne GN-F15 33 | Availability: Open 34 | 35 | The technical objective of the TIES project is to build customizable systems that can identify named entities in text, such as persons, organizations, and locations, as well as identifying relations between those entities. The technical approach is to develop new statistical and symbolic machine learning algorithms in service of the technical objective. Also, we are working on combining statistical with symbolic techniques. The first part of this talk, given by David E. Johnson, will provide a general overview of the goals of the TIES project. The second part, given by Tong Zhang, will provide background on applying statistical machine learning to this problem domain. Tong will also describe the particular statistical approach taken, which is termed Robust Risk Minimization (RMM). The final part will be given by Frank J. Oles. Frank will introduce his theory of precedence-inclusion patterns. Precedence-inclusion patterns are mathematical structures possessing multiple interacting strict partial orders that satisfy axioms generalizing the familiar properties of irreflexivity and transitivity. This very general theory provides a radically new approach to symbolic, as opposed to statistical, pattern generalization that can be applied to relational learning in a number of settings, including learning based on text, on images, or on videos. 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /data/IBM_LifeSciences.txt: -------------------------------------------------------------------------------- 1 | "Life sciences is one of the emerging markets at the heart of IBM's growth strategy," said John M. Thompson, IBM senior vice president & group executive, Software. "This investment is the first of a number of steps we will be taking to advance IBM's life sciences initiatives." In his role as newly appointed IBM Corporation vice chairman, effective September 1, Mr. Thompson will be responsible for integrating and accelerating IBM's efforts to exploit life sciences and other emerging growth areas. 2 | 3 | IBM estimates the market for IT solutions for life sciences will skyrocket from $3.5 billion today to more than $9 billion by 2003. Driving demand is the explosive growth in genomic, proteomic and pharmaceutical research. For example, the Human Genome Database is approximately three terabytes of data, or the equivalent of 150 million pages of information. The volume of life sciences data is doubling every six months. 4 | 5 | "All of this genetic data is worthless without the information technology that can help scientists manage and analyze it to unlock the pathways that will lead to new cures for many of today's diseases," said Dr. Caroline Kovac, vice president of IBM's new Life Sciences unit. "IBM can help speed this process by enabling more efficient interpretation of data and sharing of knowledge. The potential for change based on innovation in life sciences is bigger than the change caused by the digital circuit." 6 | 7 | Among the life sciences initiatives already underway at IBM are: 8 | - DiscoveryLink* -- For the first time, researchers using this combination of innovative middleware and integration services can join together information from many sources to solve complex medical research problems. DiscoveryLink creates a "virtual database" that permits data to be accessed and extracted from multiple data sources used in research and development projects. This IT solution can dramatically improve product cycle time and lower development costs for pharmaceutical, biotechnology and agri-science companies. 9 | 10 | - Blue Gene* - IBM is building a supercomputer 100 times faster than any available today designed to advance understanding of the mechanisms behind protein folding through large-scale biomolecular simulation. In December, IBM committed $100 million to this five-year research project to advance the state-of-the-art in supercomputing for biological applications. 11 | - Bio-Dictionary* -- IBM has compiled a protein dictionary containing some 30 million protein "words" designed to accelerate the understanding of protein shapes and functions.Bio-Dictionaries for selected genomes, as well as bioinformatics algorithms for pattern discovery and other relevant applications, are available to scientists and researchers for noncommercial use through a website dedicated to life sciences content at http://www.research.ibm.com/compsci/compbio/. 12 | 13 | * Indicates trademark or registered trademark of IBM Corporation. -------------------------------------------------------------------------------- /data/UIMASummerSchool2003.txt: -------------------------------------------------------------------------------- 1 | UIMA Summer School 2 | 3 | August 26, 2003 4 | UIMA 101 - The New UIMA Introduction 5 | (Hands-on Tutorial) 6 | 9:00AM-5:00PM in HAW GN-K35 7 | 8 | August 28, 2003 9 | FROST Tutorial 10 | 9:00AM-5:00PM in HAW GN-K35 11 | 12 | September 15, 2003 13 | UIMA 201: UIMA Advanced Topics 14 | (Hands-on Tutorial) 15 | 9:00AM-5:00PM in HAW 1S-F53 16 | 17 | September 17, 2003 18 | The UIMA System Integration Test and Hardening Service 19 | The "SITH" 20 | 3:00PM-4:30PM in HAW GN-K35 21 | 22 | 23 | 24 | UIMA Summer School Tutorial and Presentation Details 25 | UIMA 101: The new UIMA tutorial 26 | Tuesday August 26 9:00AM - 4:30PM in GN-K35 27 | 28 | UIMA 101 is a hands-on programming tutorial. 29 | 30 | UIMA 101 is intended for people who want a first introductory course to UIMA or for people who would like a refresher. 31 | 32 | The tutorial covers the same concepts in the first UIMA tutorial given in 3Q 2002 except for some key updates: 33 | 34 | 1) It uses a new interface to the CAS that makes it more natural to access and update CAS feature structures using ordinary Java objects (i.e., the JCAS) and 35 | 2) It uses updated TAE interfaces that give the application developer more control over managing multiple CASs. 36 | 37 | Please NOTE expert users of UIMA can skip this one and should consider attending the Advanced Topics tutorial. 38 | 39 | Prerequisites for the UIMA 101 Tutorial 40 | 1) Java Programming 41 | 2) Some experience with Eclipse IDE helpful 42 | 43 | FROST Tutorial 44 | August 28 9:00AM - 5:00PM in GN-K35 45 | 46 | Visitors from the FROST team will be here to talk to us about FROST. 47 | 48 | UIMA 201: The UIMA Advanced Topics Tutorial 49 | September 15: 9:00AM - 5:30PM in Hawthorne 1S-F53 50 | 51 | UIMA 201 will introduce some new UIMA concepts and walk the student through hands-on examples. 52 | 53 | The advanced topics tutorial is designed for people who have some experience with UIMA and want 54 | to use new capabilities of UIMA 1.0 to address one or more of the following 55 | Advanced Topics: 56 | 57 | 1) Collection Processing and Collection Processing Engines (CPEs) 58 | 2) Multi-Threading and CAS Pooling 59 | 3) Using the UIMA adapter framework to integrate network TAEs with Java TAEs 60 | 4) A Semantic Search Application that brings it all together 61 | 62 | Prerequisites for UIMA 201 63 | 1) UIMA 101 Tutorial OR Extensive UIMA Experience 64 | 65 | The UIMA Integration Test bed Service (The "SITH") 66 | September 17 3:00PM - 4:30PM in HAW GN-K35 67 | 68 | We have developed the first version of the UIMA Integration Test bed service. 69 | 70 | This service is being developed to help test, evaluate, certify and publish UIMA compliant components. 71 | 72 | In this talk we will explain the service and what it is intended to provide the UIMA community. We will address the following topics: 73 | 74 | 1. SITH Services 75 | 2. How to submit components and what to expect in return 76 | 3. Overview of the test bed implementation using Collection Processing UIMA and Juru. 77 | 4. Next Steps for the SITH 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex6/UimaAcronymAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex6; 21 | 22 | import java.util.StringTokenizer; 23 | 24 | import org.apache.uima.UimaContext; 25 | import org.apache.uima.analysis_component.AnalysisComponent; 26 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 27 | import org.apache.uima.jcas.JCas; 28 | import org.apache.uima.resource.ResourceAccessException; 29 | import org.apache.uima.resource.ResourceInitializationException; 30 | import org.apache.uima.tutorial.UimaAcronym; 31 | 32 | /** 33 | * Annotates UIMA acronyms and provides their expanded forms. When combined in an aggregate TAE with 34 | * the UimaMeetingAnnotator, demonstrates the use of the ResourceManager to share data between 35 | * annotators. 36 | * 37 | * 38 | */ 39 | public class UimaAcronymAnnotator extends JCasAnnotator_ImplBase { 40 | /** Map from acronyms to their expanded forms */ 41 | private StringMapResource mMap; 42 | 43 | /** 44 | * @see AnalysisComponent#initialize(UimaContext) 45 | */ 46 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 47 | super.initialize(aContext); 48 | // get a reference to the String Map Resource 49 | try { 50 | mMap = (StringMapResource) getContext().getResourceObject("AcronymTable"); 51 | } catch (ResourceAccessException e) { 52 | throw new ResourceInitializationException(e); 53 | } 54 | } 55 | 56 | /** 57 | * @see JCasAnnotator_ImplBase#process(JCas) 58 | */ 59 | public void process(JCas aJCas) { 60 | // go through document word-by-word 61 | String text = aJCas.getDocumentText(); 62 | int pos = 0; 63 | StringTokenizer tokenizer = new StringTokenizer(text, " \t\n\r.<.>/?\";:[{]}\\|=+()!", true); 64 | while (tokenizer.hasMoreTokens()) { 65 | String token = tokenizer.nextToken(); 66 | // look up token in map to see if it is an acronym 67 | String expandedForm = mMap.get(token); 68 | if (expandedForm != null) { 69 | // create annotation 70 | UimaAcronym annot = new UimaAcronym(aJCas, pos, pos + token.length(), expandedForm); 71 | annot.addToIndexes(); 72 | } 73 | // incrememnt pos and go to next token 74 | pos += token.length(); 75 | } 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex2/RoomNumberAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | // 20 | // 21 | package org.apache.uima.tutorial.ex2; 22 | 23 | import java.util.regex.Matcher; 24 | import java.util.regex.Pattern; 25 | import org.apache.uima.UimaContext; 26 | import org.apache.uima.analysis_component.AnalysisComponent; 27 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 28 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.resource.ResourceInitializationException; 31 | import org.apache.uima.tutorial.RoomNumber; 32 | import org.apache.uima.util.Level; 33 | 34 | /** 35 | * Example annotator that detects room numbers using Java 1.4 regular expressions. 36 | */ 37 | public class RoomNumberAnnotator extends JCasAnnotator_ImplBase { 38 | private Pattern[] mPatterns; 39 | 40 | private String[] mLocations; 41 | 42 | /** 43 | * @see AnalysisComponent#initialize(UimaContext) 44 | */ 45 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 46 | super.initialize(aContext); 47 | // Get config. parameter values from oaqa-tutorial-ex2.yaml 48 | String[] patternStrings = (String[]) aContext.getConfigParameterValue("Patterns"); 49 | mLocations = (String[]) aContext.getConfigParameterValue("Locations"); 50 | 51 | // compile regular expressions 52 | mPatterns = new Pattern[patternStrings.length]; 53 | for (int i = 0; i < patternStrings.length; i++) { 54 | mPatterns[i] = Pattern.compile(patternStrings[i]); 55 | } 56 | } 57 | 58 | /** 59 | * @see JCasAnnotator_ImplBase#process(JCas) 60 | */ 61 | public void process(JCas aJCas) throws AnalysisEngineProcessException { 62 | // get document text 63 | String docText = aJCas.getDocumentText(); 64 | 65 | // loop over patterns 66 | for (int i = 0; i < mPatterns.length; i++) { 67 | Matcher matcher = mPatterns[i].matcher(docText); 68 | while (matcher.find()) { 69 | // found one - create annotation 70 | RoomNumber annotation = new RoomNumber(aJCas); 71 | annotation.setBegin(matcher.start()); 72 | annotation.setEnd(matcher.end()); 73 | annotation.setBuilding(mLocations[i]); 74 | annotation.addToIndexes(); 75 | getContext().getLogger().log(Level.FINEST, "Found: " + annotation); 76 | } 77 | } 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex3/RoomNumberAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | // 20 | // 21 | package org.apache.uima.tutorial.ex3; 22 | 23 | import java.util.regex.Matcher; 24 | import java.util.regex.Pattern; 25 | import org.apache.uima.UimaContext; 26 | import org.apache.uima.analysis_component.AnalysisComponent; 27 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 28 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.resource.ResourceInitializationException; 31 | import org.apache.uima.tutorial.RoomNumber; 32 | import org.apache.uima.util.Level; 33 | 34 | /** 35 | * Example annotator that detects room numbers using Java 1.4 regular expressions. 36 | */ 37 | public class RoomNumberAnnotator extends JCasAnnotator_ImplBase { 38 | private Pattern[] mPatterns; 39 | 40 | private String[] mLocations; 41 | 42 | /** 43 | * @see AnalysisComponent#initialize(UimaContext) 44 | */ 45 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 46 | super.initialize(aContext); 47 | // Get config. parameter values from oaqa-tutorial-ex2.yaml 48 | String[] patternStrings = (String[]) aContext.getConfigParameterValue("Patterns"); 49 | mLocations = (String[]) aContext.getConfigParameterValue("Locations"); 50 | 51 | // compile regular expressions 52 | mPatterns = new Pattern[patternStrings.length]; 53 | for (int i = 0; i < patternStrings.length; i++) { 54 | mPatterns[i] = Pattern.compile(patternStrings[i]); 55 | } 56 | } 57 | 58 | /** 59 | * @see JCasAnnotator_ImplBase#process(JCas) 60 | */ 61 | public void process(JCas aJCas) throws AnalysisEngineProcessException { 62 | // get document text 63 | String docText = aJCas.getDocumentText(); 64 | 65 | // loop over patterns 66 | for (int i = 0; i < mPatterns.length; i++) { 67 | Matcher matcher = mPatterns[i].matcher(docText); 68 | while (matcher.find()) { 69 | // found one - create annotation 70 | RoomNumber annotation = new RoomNumber(aJCas); 71 | annotation.setBegin(matcher.start()); 72 | annotation.setEnd(matcher.end()); 73 | annotation.setBuilding(mLocations[i]); 74 | annotation.addToIndexes(); 75 | getContext().getLogger().log(Level.FINEST, "Found: " + annotation); 76 | } 77 | } 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/collection/fs/FileCollectionReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Carnegie Mellon University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package collection.fs; 18 | import java.io.FileInputStream; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.io.InputStreamReader; 22 | import java.io.Reader; 23 | import java.util.Arrays; 24 | import java.util.Iterator; 25 | 26 | import mx.bigdata.streaming.RecordBuilder; 27 | import mx.bigdata.streaming.StreamingRecordReader; 28 | 29 | import org.apache.uima.resource.ResourceInitializationException; 30 | 31 | import edu.cmu.lti.oaqa.framework.DataElement; 32 | import edu.cmu.lti.oaqa.framework.collection.IterableCollectionReader; 33 | 34 | public final class FileCollectionReader extends IterableCollectionReader { 35 | 36 | private static final String FILE_PROPERTY = "openqa.collection.filename"; 37 | 38 | private StreamingRecordReader reader; 39 | 40 | @Override 41 | protected Iterator getInputSet() throws ResourceInitializationException { 42 | String filename = System.getProperty(FILE_PROPERTY); 43 | try { 44 | if (filename == null) { 45 | this.reader = buildIteratorFromFile(); 46 | } else { 47 | InputStream in = new FileInputStream(filename); 48 | this.reader = buildIterator(in); 49 | } 50 | } catch (IOException e) { 51 | throw new ResourceInitializationException(e); 52 | } 53 | return reader.iterator(); 54 | } 55 | 56 | private StreamingRecordReader buildIteratorFromFile() throws IOException { 57 | System.err.printf("%s system property not specified, using 'file'" 58 | + " parameter from configuration file\n",FILE_PROPERTY); 59 | String resource = (String) getConfigParameterValue("file"); 60 | if (resource != null) { 61 | System.err.printf("Reading file: %s from the classpath\n", resource); 62 | InputStream in = getClass().getResourceAsStream(resource); 63 | return buildIterator(in); 64 | } else { 65 | throw new IllegalArgumentException(String.format("Parameter 'file' must be specified")); 66 | } 67 | } 68 | 69 | private StreamingRecordReader buildIterator(InputStream in) throws IOException { 70 | Reader reader = new InputStreamReader(in); 71 | return StreamingRecordReader.newReader(reader, new DataElementBuilder()); 72 | } 73 | 74 | @Override 75 | public void close() throws IOException { 76 | reader.close(); 77 | } 78 | 79 | private final class DataElementBuilder implements RecordBuilder { 80 | @Override 81 | public DataElement build(String line) { 82 | String[] data = line.split("\\|"); 83 | System.out.println(Arrays.toString(data)); 84 | return new DataElement(getDataset(), "0", line, null); 85 | } 86 | } 87 | } -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex5/RoomNumberAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex5; 21 | 22 | import java.util.regex.Matcher; 23 | import java.util.regex.Pattern; 24 | import java.util.regex.PatternSyntaxException; 25 | 26 | import org.apache.uima.UimaContext; 27 | import org.apache.uima.analysis_component.AnalysisComponent; 28 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.resource.ResourceInitializationException; 31 | import org.apache.uima.tutorial.RoomNumber; 32 | import org.apache.uima.util.Level; 33 | 34 | /** 35 | * Example annotator that detects room numbers using Java 1.4 regular expressions. 36 | */ 37 | public class RoomNumberAnnotator extends JCasAnnotator_ImplBase { 38 | private Pattern[] mPatterns; 39 | 40 | private String[] mLocations; 41 | 42 | public static final String MESSAGE_DIGEST = "org.apache.uima.tutorial.ex5.RoomNumberAnnotator_Messages"; 43 | 44 | /** 45 | * @see AnalysisComponent#initialize(UimaContext) 46 | */ 47 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 48 | super.initialize(aContext); 49 | // Get config. parameter values 50 | String[] patternStrings = (String[]) aContext.getConfigParameterValue("Patterns"); 51 | mLocations = (String[]) aContext.getConfigParameterValue("Locations"); 52 | 53 | // compile regular expressions 54 | mPatterns = new Pattern[patternStrings.length]; 55 | for (int i = 0; i < patternStrings.length; i++) { 56 | try { 57 | mPatterns[i] = Pattern.compile(patternStrings[i]); 58 | } catch (PatternSyntaxException e) { 59 | throw new ResourceInitializationException(MESSAGE_DIGEST, "regex_syntax_error", 60 | new Object[] { patternStrings[i] }, e); 61 | } 62 | } 63 | } 64 | 65 | /** 66 | * @see JCasAnnotator_ImplBase#process(JCas) 67 | */ 68 | public void process(JCas aJCas) { 69 | // get document text 70 | String docText = aJCas.getDocumentText(); 71 | 72 | // loop over patterns 73 | for (int i = 0; i < mPatterns.length; i++) { 74 | Matcher matcher = mPatterns[i].matcher(docText); 75 | while (matcher.find()) { 76 | // found one - create annotation 77 | RoomNumber annotation = new RoomNumber(aJCas); 78 | annotation.setBegin(matcher.start()); 79 | annotation.setEnd(matcher.end()); 80 | annotation.addToIndexes(); 81 | annotation.setBuilding(mLocations[i]); 82 | getContext().getLogger().log(Level.FINEST, "Found: " + annotation); 83 | } 84 | } 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /data/New_IBM_Fellows.txt: -------------------------------------------------------------------------------- 1 | IBM today elevated five employees to the title of IBM Fellow -- its most prestigious technical honor. The company also presented more than $2.8 million in cash awards to employees whose technical innovation have yielded exceptional value to the company and its customers. 2 | 3 | IBM conferred the accolades and awards at its 2003 Corporate Technical Recognition Event (CTRE) in Scottsdale, Ariz. CTRE is a 40-year tradition at IBM, established to recognize exceptional technical employees and reward them for extraordinary achievements and contributions to the company's technology leadership. 4 | 5 | "Our technical employees are among the best and brightest innovators in the world. They share a passion for excellence that defines their work and permeates the products and services IBM delivers to its customers," said Nick Donofrio, senior vice president, technology and manufacturing for IBM. "CTRE provides the means for us to honor those who have distinguished themselves as exceptional leaders among their peers." 6 | 7 | Among the special honorees at the 2003 CTRE are five employees who earned the coveted distinction of IBM Fellow: 8 | 9 | 10 | - Grady Booch, chief scientist of Rational Software, IBM Software Group. Recognized internationally for his innovative work on software architecture, modeling, and software engineering process. Mr. Booch is one of the original authors of the Unified Modeling Language (UML), the industry-standard language of blueprints for software-intensive systems. 11 | 12 | - Dr. Donald Chamberlin, researcher, IBM Almaden Research Center. An expert in relational database languages, Dr. Chamberlin is co- inventor of SQL, the language that energized the relational database market. He has also influenced the creation of XQuery, one of a new generation of database query languages covering structured, semi-structured and unstructured data. 13 | 14 | - Dr. George Galambos, chief technology officer, IBM Global Services (IGS) in Canada; the first Fellow from Canada. Dr. Galambos specializes in high-performance, high availability designs, operational effectiveness, and risk assessment/mitigation, focusing on systems engineering and architecture reuse that enhances efficiency and stability. He is a principal driver of and contributor to the widely acclaimed "Patterns for e-business" and the Enterprise Solution Structure Reference Architectures, widely used by IGS in customer engagements. 15 | 16 | - Rod Smith, vice president of Internet emerging technologies, IBM Software Group. A leader in the areas of object-oriented programming, visual development tools, Java, XML, and Web Services. Rod also was the chief technical strategist for focusing the Java platform for use in middleware solutions, in particular initiating contributions to the development of the J2EE. 17 | 18 | - Charles Webb, eServer processor design, IBM Systems Group. Charles Webb has led the reinvention of IBM's eServer zSeries microprocessor designs and roadmap, including the z900 server, where he provided the bridge among architecture, hardware, compilers and system software, defining major portions of the 64- bit architecture and beyond. 19 | 20 | 21 | The title of IBM Fellow is the company's most preeminent technical distinction and is granted in recognition of outstanding and sustained technical achievements in engineering, programming, science and technology. Only 175 individuals have earned this designation in the company's history and, including the newly named Fellows, 56 are active employees. IBM Fellows are encouraged to further enhance their potential for creative achievements and typically work on special projects or research initiatives that lead the company in exciting new directions. 22 | 23 | -------------------------------------------------------------------------------- /src/main/java/example/PersonTitle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package example; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * A Personal Title. Updated by JCasGen Mon May 23 17:48:43 EDT 2005 XML source: 29 | * c:\a\eclipse\301jxe\jedii_examples\descriptors\analysis_engine\NamesAndPersonTitles_TAE.xml 30 | * 31 | * @generated 32 | */ 33 | public class PersonTitle extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(PersonTitle.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected PersonTitle() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public PersonTitle(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public PersonTitle(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public PersonTitle(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | // *--------------* 91 | // * Feature: Kind 92 | 93 | /** 94 | * getter for Kind - gets The kind of title - Civilian, Military, or Government. 95 | * 96 | * @generated 97 | */ 98 | public String getKind() { 99 | if (PersonTitle_Type.featOkTst && ((PersonTitle_Type) jcasType).casFeat_Kind == null) 100 | this.jcasType.jcas.throwFeatMissing("Kind", "example.PersonTitle"); 101 | return jcasType.ll_cas.ll_getStringValue(addr, ((PersonTitle_Type) jcasType).casFeatCode_Kind); 102 | } 103 | 104 | /** 105 | * setter for Kind - sets The kind of title - Civilian, Military, or Government. 106 | * 107 | * @generated 108 | */ 109 | public void setKind(String v) { 110 | if (PersonTitle_Type.featOkTst && ((PersonTitle_Type) jcasType).casFeat_Kind == null) 111 | this.jcasType.jcas.throwFeatMissing("Kind", "example.PersonTitle"); 112 | jcasType.ll_cas.ll_setStringValue(addr, ((PersonTitle_Type) jcasType).casFeatCode_Kind, v); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/RoomNumber.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * Updated by JCasGen Mon Nov 29 15:02:37 EST 2004 XML source: C:/Program 29 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 30 | * 31 | * @generated 32 | */ 33 | public class RoomNumber extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(RoomNumber.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected RoomNumber() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public RoomNumber(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public RoomNumber(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public RoomNumber(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | // *--------------* 91 | // * Feature: building 92 | 93 | /** 94 | * getter for building - gets Building containing this room 95 | * 96 | * @generated 97 | */ 98 | public String getBuilding() { 99 | if (RoomNumber_Type.featOkTst && ((RoomNumber_Type) jcasType).casFeat_building == null) 100 | this.jcasType.jcas.throwFeatMissing("building", "org.apache.uima.tutorial.RoomNumber"); 101 | return jcasType.ll_cas.ll_getStringValue(addr, 102 | ((RoomNumber_Type) jcasType).casFeatCode_building); 103 | } 104 | 105 | /** 106 | * setter for building - sets Building containing this room 107 | * 108 | * @generated 109 | */ 110 | public void setBuilding(String v) { 111 | if (RoomNumber_Type.featOkTst && ((RoomNumber_Type) jcasType).casFeat_building == null) 112 | this.jcasType.jcas.throwFeatMissing("building", "org.apache.uima.tutorial.RoomNumber"); 113 | jcasType.ll_cas.ll_setStringValue(addr, ((RoomNumber_Type) jcasType).casFeatCode_building, v); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /target/classes/types/SourceDocumentInformation.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | org.apache.uima.examples.SourceDocumentInformation 26 | Defines a type for storing information about the original source document from which the current CAS was initialized 27 | 1.0 28 | The Apache Software Foundation 29 | 30 | 31 | org.apache.uima.examples.SourceDocumentInformation 32 | Stores detailed information about the original source document from which the current CAS was initialized. All information (like size) refers to the source document and not to the document in the CAS which may be converted and filtered by a CAS Initializer. For example this information will be written to the Semantic Search index so that the original document contents can be retrieved by queries. 33 | uima.tcas.Annotation 34 | 35 | 36 | uri 37 | URI of document. (For example, file:///MyDirectory/myFile.txt for a simple file or http://incubator.apache.org/uima/index.html for content from a web source.) 38 | uima.cas.String 39 | 40 | 41 | offsetInSource 42 | Byte offset of the start of document content within original source file or other input source. Only used if the CAS document was retrieved from an source where one physical source file contained several conceptual documents. Zero otherwise. 43 | uima.cas.Integer 44 | 45 | 46 | documentSize 47 | Size of original document in bytes before processing by CAS Initializer. Either absolute file size of size within file or other source. 48 | uima.cas.Integer 49 | 50 | 51 | lastSegment 52 | For a CAS that represents a segment of a larger source document, this flag indicates whether this CAS is the final segment of the source document. This is useful for downstream components that want to take some action after having seen all of the segments of a particular source document. 53 | uima.cas.Boolean 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/main/resources/types/SourceDocumentInformation.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | org.apache.uima.examples.SourceDocumentInformation 26 | Defines a type for storing information about the original source document from which the current CAS was initialized 27 | 1.0 28 | The Apache Software Foundation 29 | 30 | 31 | org.apache.uima.examples.SourceDocumentInformation 32 | Stores detailed information about the original source document from which the current CAS was initialized. All information (like size) refers to the source document and not to the document in the CAS which may be converted and filtered by a CAS Initializer. For example this information will be written to the Semantic Search index so that the original document contents can be retrieved by queries. 33 | uima.tcas.Annotation 34 | 35 | 36 | uri 37 | URI of document. (For example, file:///MyDirectory/myFile.txt for a simple file or http://incubator.apache.org/uima/index.html for content from a web source.) 38 | uima.cas.String 39 | 40 | 41 | offsetInSource 42 | Byte offset of the start of document content within original source file or other input source. Only used if the CAS document was retrieved from an source where one physical source file contained several conceptual documents. Zero otherwise. 43 | uima.cas.Integer 44 | 45 | 46 | documentSize 47 | Size of original document in bytes before processing by CAS Initializer. Either absolute file size of size within file or other source. 48 | uima.cas.Integer 49 | 50 | 51 | lastSegment 52 | For a CAS that represents a segment of a larger source document, this flag indicates whether this CAS is the final segment of the source document. This is useful for downstream components that want to take some action after having seen all of the segments of a particular source document. 53 | uima.cas.Boolean 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/DateTimeAnnot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 29 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 30 | * 31 | * @generated 32 | */ 33 | public class DateTimeAnnot extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(DateTimeAnnot.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected DateTimeAnnot() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public DateTimeAnnot(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public DateTimeAnnot(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public DateTimeAnnot(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | // *--------------* 91 | // * Feature: shortDateString 92 | 93 | /** 94 | * getter for shortDateString - gets 95 | * 96 | * @generated 97 | */ 98 | public String getShortDateString() { 99 | if (DateTimeAnnot_Type.featOkTst 100 | && ((DateTimeAnnot_Type) jcasType).casFeat_shortDateString == null) 101 | this.jcasType.jcas.throwFeatMissing("shortDateString", "org.apache.uima.tutorial.DateTimeAnnot"); 102 | return jcasType.ll_cas.ll_getStringValue(addr, 103 | ((DateTimeAnnot_Type) jcasType).casFeatCode_shortDateString); 104 | } 105 | 106 | /** 107 | * setter for shortDateString - sets 108 | * 109 | * @generated 110 | */ 111 | public void setShortDateString(String v) { 112 | if (DateTimeAnnot_Type.featOkTst 113 | && ((DateTimeAnnot_Type) jcasType).casFeat_shortDateString == null) 114 | this.jcasType.jcas.throwFeatMissing("shortDateString", "org.apache.uima.tutorial.DateTimeAnnot"); 115 | jcasType.ll_cas.ll_setStringValue(addr, 116 | ((DateTimeAnnot_Type) jcasType).casFeatCode_shortDateString, v); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/example/PersonTitle_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package example; 21 | 22 | import org.apache.uima.cas.Feature; 23 | import org.apache.uima.cas.FeatureStructure; 24 | import org.apache.uima.cas.Type; 25 | import org.apache.uima.cas.impl.CASImpl; 26 | import org.apache.uima.cas.impl.FSGenerator; 27 | import org.apache.uima.cas.impl.FeatureImpl; 28 | import org.apache.uima.cas.impl.TypeImpl; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.jcas.JCasRegistry; 31 | import org.apache.uima.jcas.tcas.Annotation_Type; 32 | 33 | /** 34 | * A Personal Title. Updated by JCasGen Mon May 23 17:48:43 EDT 2005 35 | * 36 | * @generated 37 | */ 38 | public class PersonTitle_Type extends Annotation_Type { 39 | /** @generated */ 40 | protected FSGenerator getFSGenerator() { 41 | return fsGenerator; 42 | } 43 | 44 | /** @generated */ 45 | private final FSGenerator fsGenerator = new FSGenerator() { 46 | public FeatureStructure createFS(int addr, CASImpl cas) { 47 | if (instanceOf_Type.useExistingInstance) { 48 | // Return eq fs instance if already created 49 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 50 | if (null == fs) { 51 | fs = new PersonTitle(addr, instanceOf_Type); 52 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 53 | return fs; 54 | } 55 | return fs; 56 | } else 57 | return new PersonTitle(addr, instanceOf_Type); 58 | } 59 | }; 60 | 61 | /** @generated */ 62 | public final static int typeIndexID = PersonTitle.typeIndexID; 63 | 64 | /** 65 | * @generated 66 | * @modifiable 67 | */ 68 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("example.PersonTitle"); 69 | 70 | /** @generated */ 71 | final Feature casFeat_Kind; 72 | 73 | /** @generated */ 74 | final int casFeatCode_Kind; 75 | 76 | /** @generated */ 77 | public String getKind(int addr) { 78 | if (featOkTst && casFeat_Kind == null) 79 | this.jcas.throwFeatMissing("Kind", "example.PersonTitle"); 80 | return ll_cas.ll_getStringValue(addr, casFeatCode_Kind); 81 | } 82 | 83 | /** @generated */ 84 | public void setKind(int addr, String v) { 85 | if (featOkTst && casFeat_Kind == null) 86 | this.jcas.throwFeatMissing("Kind", "example.PersonTitle"); 87 | ll_cas.ll_setStringValue(addr, casFeatCode_Kind, v); 88 | } 89 | 90 | /** 91 | * initialize variables to correspond with Cas Type and Features 92 | * 93 | * @generated 94 | */ 95 | public PersonTitle_Type(JCas jcas, Type casType) { 96 | super(jcas, casType); 97 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 98 | 99 | casFeat_Kind = jcas.getRequiredFeatureDE(casType, "Kind", "example.PersonTitleKind", featOkTst); 100 | casFeatCode_Kind = (null == casFeat_Kind) ? JCas.INVALID_FEATURE_CODE 101 | : ((FeatureImpl) casFeat_Kind).getCode(); 102 | 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/UimaAcronym.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.jcas.JCas; 23 | import org.apache.uima.jcas.JCasRegistry; 24 | import org.apache.uima.jcas.cas.TOP_Type; 25 | import org.apache.uima.jcas.tcas.Annotation; 26 | 27 | /** 28 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 XML source: C:/Program 29 | * Files/apache/uima/examples/descriptors/tutorial/ex6/TutorialTypeSystem.xml 30 | * 31 | * @generated 32 | */ 33 | public class UimaAcronym extends Annotation { 34 | /** 35 | * @generated 36 | * @ordered 37 | */ 38 | public final static int typeIndexID = JCasRegistry.register(UimaAcronym.class); 39 | 40 | /** 41 | * @generated 42 | * @ordered 43 | */ 44 | public final static int type = typeIndexID; 45 | 46 | /** @generated */ 47 | public int getTypeIndexID() { 48 | return typeIndexID; 49 | } 50 | 51 | /** 52 | * Never called. Disable default constructor 53 | * 54 | * @generated 55 | */ 56 | protected UimaAcronym() { 57 | } 58 | 59 | /** 60 | * Internal - constructor used by generator 61 | * 62 | * @generated 63 | */ 64 | public UimaAcronym(int addr, TOP_Type type) { 65 | super(addr, type); 66 | readObject(); 67 | } 68 | 69 | /** @generated */ 70 | public UimaAcronym(JCas jcas) { 71 | super(jcas); 72 | readObject(); 73 | } 74 | 75 | public UimaAcronym(JCas jcas, int begin, int end) { 76 | super(jcas); 77 | setBegin(begin); 78 | setEnd(end); 79 | readObject(); 80 | } 81 | 82 | /** 83 | * Write your own initialization here 84 | * 85 | * @generated modifiable 86 | */ 87 | private void readObject() { 88 | } 89 | 90 | // *--------------* 91 | // * Feature: expandedForm 92 | 93 | /** 94 | * getter for expandedForm - gets 95 | * 96 | * @generated 97 | */ 98 | public String getExpandedForm() { 99 | if (UimaAcronym_Type.featOkTst && ((UimaAcronym_Type) jcasType).casFeat_expandedForm == null) 100 | this.jcasType.jcas.throwFeatMissing("expandedForm", "org.apache.uima.tutorial.UimaAcronym"); 101 | return jcasType.ll_cas.ll_getStringValue(addr, 102 | ((UimaAcronym_Type) jcasType).casFeatCode_expandedForm); 103 | } 104 | 105 | /** 106 | * setter for expandedForm - sets 107 | * 108 | * @generated 109 | */ 110 | public void setExpandedForm(String v) { 111 | if (UimaAcronym_Type.featOkTst && ((UimaAcronym_Type) jcasType).casFeat_expandedForm == null) 112 | this.jcasType.jcas.throwFeatMissing("expandedForm", "org.apache.uima.tutorial.UimaAcronym"); 113 | jcasType.ll_cas.ll_setStringValue(addr, ((UimaAcronym_Type) jcasType).casFeatCode_expandedForm, 114 | v); 115 | } 116 | 117 | /** Custom constructor taking all parameters */ 118 | public UimaAcronym(JCas jcas, int start, int end, String expandedForm) { 119 | super(jcas, start, end); 120 | setExpandedForm(expandedForm); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/tokenizer/SimpleTokenAndSentenceAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.tokenizer; 21 | 22 | import java.text.BreakIterator; 23 | import java.text.ParsePosition; 24 | import java.util.Locale; 25 | 26 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 27 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 28 | import org.apache.uima.jcas.JCas; 29 | import org.apache.uima.jcas.tcas.Annotation; 30 | 31 | /** 32 | * An example annotator that annotates Tokens and Sentences. 33 | */ 34 | public class SimpleTokenAndSentenceAnnotator extends JCasAnnotator_ImplBase { 35 | 36 | static abstract class Maker { 37 | abstract Annotation newAnnotation(JCas jcas, int start, int end); 38 | } 39 | 40 | JCas jcas; 41 | 42 | String input; 43 | 44 | ParsePosition pp = new ParsePosition(0); 45 | 46 | // **************************************** 47 | // * Static vars holding break iterators 48 | // **************************************** 49 | static final BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(Locale.US); 50 | 51 | static final BreakIterator wordBreak = BreakIterator.getWordInstance(Locale.US); 52 | 53 | // ********************************************* 54 | // * function pointers for new instances * 55 | // ********************************************* 56 | static final Maker sentenceAnnotationMaker = new Maker() { 57 | Annotation newAnnotation(JCas jcas, int start, int end) { 58 | return new Sentence(jcas, start, end); 59 | } 60 | }; 61 | 62 | static final Maker tokenAnnotationMaker = new Maker() { 63 | Annotation newAnnotation(JCas jcas, int start, int end) { 64 | return new Token(jcas, start, end); 65 | } 66 | }; 67 | 68 | // ************************************************************* 69 | // * process * 70 | // ************************************************************* 71 | public void process(JCas aJCas) throws AnalysisEngineProcessException { 72 | jcas = aJCas; 73 | input = jcas.getDocumentText(); 74 | 75 | // Create Annotations 76 | makeAnnotations(sentenceAnnotationMaker, sentenceBreak); 77 | makeAnnotations(tokenAnnotationMaker, wordBreak); 78 | } 79 | 80 | // ************************************************************* 81 | // * Helper Methods * 82 | // ************************************************************* 83 | void makeAnnotations(Maker m, BreakIterator b) { 84 | b.setText(input); 85 | for (int end = b.next(), start = b.first(); end != BreakIterator.DONE; start = end, end = b 86 | .next()) { 87 | // eliminate all-whitespace tokens 88 | boolean isWhitespace = true; 89 | for (int i = start; i < end; i++) { 90 | if (!Character.isWhitespace(input.charAt(i))) { 91 | isWhitespace = false; 92 | break; 93 | } 94 | } 95 | if (!isWhitespace) { 96 | m.newAnnotation(jcas, start, end).addToIndexes(); 97 | } 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/RoomNumber_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.Feature; 23 | import org.apache.uima.cas.FeatureStructure; 24 | import org.apache.uima.cas.Type; 25 | import org.apache.uima.cas.impl.CASImpl; 26 | import org.apache.uima.cas.impl.FSGenerator; 27 | import org.apache.uima.cas.impl.FeatureImpl; 28 | import org.apache.uima.cas.impl.TypeImpl; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.jcas.JCasRegistry; 31 | import org.apache.uima.jcas.tcas.Annotation_Type; 32 | 33 | /** 34 | * Updated by JCasGen Mon Nov 29 15:02:37 EST 2004 35 | * 36 | * @generated 37 | */ 38 | public class RoomNumber_Type extends Annotation_Type { 39 | /** @generated */ 40 | protected FSGenerator getFSGenerator() { 41 | return fsGenerator; 42 | } 43 | 44 | /** @generated */ 45 | private final FSGenerator fsGenerator = new FSGenerator() { 46 | public FeatureStructure createFS(int addr, CASImpl cas) { 47 | if (instanceOf_Type.useExistingInstance) { 48 | // Return eq fs instance if already created 49 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 50 | if (null == fs) { 51 | fs = new RoomNumber(addr, instanceOf_Type); 52 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 53 | return fs; 54 | } 55 | return fs; 56 | } else 57 | return new RoomNumber(addr, instanceOf_Type); 58 | } 59 | }; 60 | 61 | /** @generated */ 62 | public final static int typeIndexID = RoomNumber.typeIndexID; 63 | 64 | /** 65 | * @generated 66 | * @modifiable 67 | */ 68 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.RoomNumber"); 69 | 70 | /** @generated */ 71 | final Feature casFeat_building; 72 | 73 | /** @generated */ 74 | final int casFeatCode_building; 75 | 76 | /** @generated */ 77 | public String getBuilding(int addr) { 78 | if (featOkTst && casFeat_building == null) 79 | this.jcas.throwFeatMissing("building", "org.apache.uima.tutorial.RoomNumber"); 80 | return ll_cas.ll_getStringValue(addr, casFeatCode_building); 81 | } 82 | 83 | /** @generated */ 84 | public void setBuilding(int addr, String v) { 85 | if (featOkTst && casFeat_building == null) 86 | this.jcas.throwFeatMissing("building", "org.apache.uima.tutorial.RoomNumber"); 87 | ll_cas.ll_setStringValue(addr, casFeatCode_building, v); 88 | } 89 | 90 | /** 91 | * initialize variables to correspond with Cas Type and Features 92 | * 93 | * @generated 94 | */ 95 | public RoomNumber_Type(JCas jcas, Type casType) { 96 | super(jcas, casType); 97 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 98 | 99 | casFeat_building = jcas.getRequiredFeatureDE(casType, "building", "uima.cas.String", featOkTst); 100 | casFeatCode_building = (null == casFeat_building) ? JCas.INVALID_FEATURE_CODE 101 | : ((FeatureImpl) casFeat_building).getCode(); 102 | 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/UimaAcronym_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.Feature; 23 | import org.apache.uima.cas.FeatureStructure; 24 | import org.apache.uima.cas.Type; 25 | import org.apache.uima.cas.impl.CASImpl; 26 | import org.apache.uima.cas.impl.FSGenerator; 27 | import org.apache.uima.cas.impl.FeatureImpl; 28 | import org.apache.uima.cas.impl.TypeImpl; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.jcas.JCasRegistry; 31 | import org.apache.uima.jcas.tcas.Annotation_Type; 32 | 33 | /** 34 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 35 | * 36 | * @generated 37 | */ 38 | public class UimaAcronym_Type extends Annotation_Type { 39 | /** @generated */ 40 | protected FSGenerator getFSGenerator() { 41 | return fsGenerator; 42 | } 43 | 44 | /** @generated */ 45 | private final FSGenerator fsGenerator = new FSGenerator() { 46 | public FeatureStructure createFS(int addr, CASImpl cas) { 47 | if (instanceOf_Type.useExistingInstance) { 48 | // Return eq fs instance if already created 49 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 50 | if (null == fs) { 51 | fs = new UimaAcronym(addr, instanceOf_Type); 52 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 53 | return fs; 54 | } 55 | return fs; 56 | } else 57 | return new UimaAcronym(addr, instanceOf_Type); 58 | } 59 | }; 60 | 61 | /** @generated */ 62 | public final static int typeIndexID = UimaAcronym.typeIndexID; 63 | 64 | /** 65 | * @generated 66 | * @modifiable 67 | */ 68 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.UimaAcronym"); 69 | 70 | /** @generated */ 71 | final Feature casFeat_expandedForm; 72 | 73 | /** @generated */ 74 | final int casFeatCode_expandedForm; 75 | 76 | /** @generated */ 77 | public String getExpandedForm(int addr) { 78 | if (featOkTst && casFeat_expandedForm == null) 79 | this.jcas.throwFeatMissing("expandedForm", "org.apache.uima.tutorial.UimaAcronym"); 80 | return ll_cas.ll_getStringValue(addr, casFeatCode_expandedForm); 81 | } 82 | 83 | /** @generated */ 84 | public void setExpandedForm(int addr, String v) { 85 | if (featOkTst && casFeat_expandedForm == null) 86 | this.jcas.throwFeatMissing("expandedForm", "org.apache.uima.tutorial.UimaAcronym"); 87 | ll_cas.ll_setStringValue(addr, casFeatCode_expandedForm, v); 88 | } 89 | 90 | /** 91 | * initialize variables to correspond with Cas Type and Features 92 | * 93 | * @generated 94 | */ 95 | public UimaAcronym_Type(JCas jcas, Type casType) { 96 | super(jcas, casType); 97 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 98 | 99 | casFeat_expandedForm = jcas.getRequiredFeatureDE(casType, "expandedForm", "uima.cas.String", 100 | featOkTst); 101 | casFeatCode_expandedForm = (null == casFeat_expandedForm) ? JCas.INVALID_FEATURE_CODE 102 | : ((FeatureImpl) casFeat_expandedForm).getCode(); 103 | 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/DateTimeAnnot_Type.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial; 21 | 22 | import org.apache.uima.cas.Feature; 23 | import org.apache.uima.cas.FeatureStructure; 24 | import org.apache.uima.cas.Type; 25 | import org.apache.uima.cas.impl.CASImpl; 26 | import org.apache.uima.cas.impl.FSGenerator; 27 | import org.apache.uima.cas.impl.FeatureImpl; 28 | import org.apache.uima.cas.impl.TypeImpl; 29 | import org.apache.uima.jcas.JCas; 30 | import org.apache.uima.jcas.JCasRegistry; 31 | import org.apache.uima.jcas.tcas.Annotation_Type; 32 | 33 | /** 34 | * Updated by JCasGen Mon Nov 29 15:02:38 EST 2004 35 | * 36 | * @generated 37 | */ 38 | public class DateTimeAnnot_Type extends Annotation_Type { 39 | /** @generated */ 40 | protected FSGenerator getFSGenerator() { 41 | return fsGenerator; 42 | } 43 | 44 | /** @generated */ 45 | private final FSGenerator fsGenerator = new FSGenerator() { 46 | public FeatureStructure createFS(int addr, CASImpl cas) { 47 | if (instanceOf_Type.useExistingInstance) { 48 | // Return eq fs instance if already created 49 | FeatureStructure fs = instanceOf_Type.jcas.getJfsFromCaddr(addr); 50 | if (null == fs) { 51 | fs = new DateTimeAnnot(addr, instanceOf_Type); 52 | instanceOf_Type.jcas.putJfsFromCaddr(addr, fs); 53 | return fs; 54 | } 55 | return fs; 56 | } else 57 | return new DateTimeAnnot(addr, instanceOf_Type); 58 | } 59 | }; 60 | 61 | /** @generated */ 62 | public final static int typeIndexID = DateTimeAnnot.typeIndexID; 63 | 64 | /** 65 | * @generated 66 | * @modifiable 67 | */ 68 | public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.tutorial.DateTimeAnnot"); 69 | 70 | /** @generated */ 71 | final Feature casFeat_shortDateString; 72 | 73 | /** @generated */ 74 | final int casFeatCode_shortDateString; 75 | 76 | /** @generated */ 77 | public String getShortDateString(int addr) { 78 | if (featOkTst && casFeat_shortDateString == null) 79 | this.jcas.throwFeatMissing("shortDateString", "org.apache.uima.tutorial.DateTimeAnnot"); 80 | return ll_cas.ll_getStringValue(addr, casFeatCode_shortDateString); 81 | } 82 | 83 | /** @generated */ 84 | public void setShortDateString(int addr, String v) { 85 | if (featOkTst && casFeat_shortDateString == null) 86 | this.jcas.throwFeatMissing("shortDateString", "org.apache.uima.tutorial.DateTimeAnnot"); 87 | ll_cas.ll_setStringValue(addr, casFeatCode_shortDateString, v); 88 | } 89 | 90 | /** 91 | * initialize variables to correspond with Cas Type and Features 92 | * 93 | * @generated 94 | */ 95 | public DateTimeAnnot_Type(JCas jcas, Type casType) { 96 | super(jcas, casType); 97 | casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl) this.casType, getFSGenerator()); 98 | 99 | casFeat_shortDateString = jcas.getRequiredFeatureDE(casType, "shortDateString", 100 | "uima.cas.String", featOkTst); 101 | casFeatCode_shortDateString = (null == casFeat_shortDateString) ? JCas.INVALID_FEATURE_CODE 102 | : ((FeatureImpl) casFeat_shortDateString).getCode(); 103 | 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /data/xml/IBM_LifeSciences.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | IBM announces $100 Million investment in Life Sciences 26 | 16 August 2000 27 | "Life sciences is one of the emerging markets at the heart of IBM's growth strategy," said John M. Thompson, IBM senior vice president & group executive, Software. "This investment is the first of a number of steps we will be taking to advance IBM's life sciences initiatives." In his role as newly appointed IBM Corporation vice chairman, effective September 1, Mr. Thompson will be responsible for integrating and accelerating IBM's efforts to exploit life sciences and other emerging growth areas. 28 | 29 | IBM estimates the market for IT solutions for life sciences will skyrocket from $3.5 billion today to more than $9 billion by 2003. Driving demand is the explosive growth in genomic, proteomic and pharmaceutical research. For example, the Human Genome Database is approximately three terabytes of data, or the equivalent of 150 million pages of information. The volume of life sciences data is doubling every six months. 30 | 31 | "All of this genetic data is worthless without the information technology that can help scientists manage and analyze it to unlock the pathways that will lead to new cures for many of today's diseases," said Dr. Caroline Kovac, vice president of IBM's new Life Sciences unit. "IBM can help speed this process by enabling more efficient interpretation of data and sharing of knowledge. The potential for change based on innovation in life sciences is bigger than the change caused by the digital circuit." 32 | 33 | Among the life sciences initiatives already underway at IBM are: 34 | - DiscoveryLink* -- For the first time, researchers using this combination of innovative middleware and integration services can join together information from many sources to solve complex medical research problems. DiscoveryLink creates a "virtual database" that permits data to be accessed and extracted from multiple data sources used in research and development projects. This IT solution can dramatically improve product cycle time and lower development costs for pharmaceutical, biotechnology and agri-science companies. 35 | 36 | - Blue Gene* - IBM is building a supercomputer 100 times faster than any available today designed to advance understanding of the mechanisms behind protein folding through large-scale biomolecular simulation. In December, IBM committed $100 million to this five-year research project to advance the state-of-the-art in supercomputing for biological applications. 37 | - Bio-Dictionary* -- IBM has compiled a protein dictionary containing some 30 million protein "words" designed to accelerate the understanding of protein shapes and functions.Bio-Dictionaries for selected genomes, as well as bioinformatics algorithms for pattern discovery and other relevant applications, are available to scientists and researchers for noncommercial use through a website dedicated to life sciences content at http://www.research.ibm.com/compsci/compbio/. 38 | 39 |
* Indicates trademark or registered trademark of IBM Corporation.
40 |
-------------------------------------------------------------------------------- /data/xml/UIMASummerSchool2003.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | UIMA Summer School 25 | 1 August 2003 26 | 27 | August 26, 2003 28 | UIMA 101 - The New UIMA Introduction 29 | (Hands-on Tutorial) 30 | 9:00AM-5:00PM in HAW GN-K35 31 | 32 | August 28, 2003 33 | FROST Tutorial 34 | 9:00AM-5:00PM in HAW GN-K35 35 | 36 | September 15, 2003 37 | UIMA 201: UIMA Advanced Topics 38 | (Hands-on Tutorial) 39 | 9:00AM-5:00PM in HAW 1S-F53 40 | 41 | September 17, 2003 42 | The UIMA System Integration Test and Hardening Service 43 | The "SITH" 44 | 3:00PM-4:30PM in HAW GN-K35 45 | 46 | 47 | 48 | UIMA Summer School Tutorial and Presentation Details 49 | UIMA 101: The new UIMA tutorial 50 | Tuesday August 26 9:00AM - 4:30PM in GN-K35 51 | 52 | UIMA 101 is a hands-on programming tutorial. 53 | 54 | UIMA 101 is intended for people who want a first introductory course to UIMA or for people who would like a refresher. 55 | 56 | The tutorial covers the same concepts in the first UIMA tutorial given in 3Q 2002 except for some key updates: 57 | 58 | 1) It uses a new interface to the CAS that makes it more natural to access and update CAS feature structures using ordinary Java objects (i.e., the JCAS) and 59 | 2) It uses updated TAE interfaces that give the application developer more control over managing multiple CASs. 60 | 61 | Please NOTE expert users of UIMA can skip this one and should consider attending the Advanced Topics tutorial. 62 | 63 | Prerequisites for the UIMA 101 Tutorial 64 | 1) Java Programming 65 | 2) Some experience with Eclipse IDE helpful 66 | 67 | FROST Tutorial 68 | August 28 9:00AM - 5:00PM in GN-K35 69 | 70 | Visitors from the FROST team will be here to talk to us about FROST. 71 | 72 | UIMA 201: The UIMA Advanced Topics Tutorial 73 | September 15: 9:00AM - 5:30PM in Hawthorne 1S-F53 74 | 75 | UIMA 201 will introduce some new UIMA concepts and walk the student through hands-on examples. 76 | 77 | The advanced topics tutorial is designed for people who have some experience with UIMA and want 78 | to use new capabilities of UIMA 1.0 to address one or more of the following 79 | Advanced Topics: 80 | 81 | 1) Collection Processing and Collection Processing Engines (CPEs) 82 | 2) Multi-Threading and CAS Pooling 83 | 3) Using the UIMA adapter framework to integrate network TAEs with Java TAEs 84 | 4) A Semantic Search Application that brings it all together 85 | 86 | Prerequisites for UIMA 201 87 | 1) UIMA 101 Tutorial OR Extensive UIMA Experience 88 | 89 | The UIMA Integration Test bed Service (The "SITH") 90 | September 17 3:00PM - 4:30PM in HAW GN-K35 91 | 92 | We have developed the first version of the UIMA Integration Test bed service. 93 | 94 | This service is being developed to help test, evaluate, certify and publish UIMA compliant components. 95 | 96 | In this talk we will explain the service and what it is intended to provide the UIMA community. We will address the following topics: 97 | 98 | 1. SITH Services 99 | 2. How to submit components and what to expect in return 100 | 3. Overview of the test bed implementation using Collection Processing UIMA and Juru. 101 | 4. Next Steps for the SITH 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/casMultiplier/CasMultiplierExampleApplication.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.casMultiplier; 21 | 22 | import java.io.File; 23 | import java.io.PrintStream; 24 | 25 | import org.apache.uima.UIMAFramework; 26 | import org.apache.uima.analysis_engine.AnalysisEngine; 27 | import org.apache.uima.analysis_engine.CasIterator; 28 | import org.apache.uima.cas.CAS; 29 | import org.apache.uima.examples.PrintAnnotations; 30 | import org.apache.uima.resource.ResourceSpecifier; 31 | import org.apache.uima.util.FileUtils; 32 | import org.apache.uima.util.XMLInputSource; 33 | 34 | /** 35 | * An example application that shows how to interact with a CasMultiplier. A CasMultiplier is a type 36 | * of Analysis Engine that outputs new CASes. One use of a CasMultiplier is to divide a large CAS 37 | * into smaller pieces - a CasMultiplier that does this is called a "Segmenter". 38 | *

39 | * This program takes two arguments - 40 | *

    41 | *
  • The path to the Analysis Engine Descriptor for the CasMultiplier to run (such as 42 | * descriptors/cas_multiplier/SimpleTextSegmenter.xml or 43 | * descriptors/cas_multiplier/SegmenterAndTokenizerAE.xml)
  • 44 | *
  • The file name of a text document to analyze (to see the effect of segmentation, choose a 45 | * document larger than 100k characters, which is the default segment size produced by the 46 | * SimpleTextSegmenter.
  • 47 | *
48 | */ 49 | public class CasMultiplierExampleApplication { 50 | static PrintStream outputStream; 51 | 52 | /** 53 | * Main program. 54 | * 55 | * @param args 56 | * Command-line arguments - see class description 57 | */ 58 | public static void main(String[] args) { 59 | try { 60 | // get Resource Specifier from XML file 61 | XMLInputSource in = new XMLInputSource(args[0]); 62 | ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); 63 | 64 | // create AnalysisEngine 65 | AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier); 66 | 67 | // read input text file 68 | File textFile = new File(args[1]); 69 | String document = FileUtils.file2String(textFile, "UTF-8"); 70 | 71 | // create a new CAS and set the document text 72 | CAS initialCas = ae.newCAS(); 73 | initialCas.setDocumentText(document); 74 | 75 | // pass the CAS to the AnalysisEngine and get back 76 | // a CasIterator for stepping over the output CASes that are produced. 77 | CasIterator casIterator = ae.processAndOutputNewCASes(initialCas); 78 | while (casIterator.hasNext()) { 79 | CAS outCas = casIterator.next(); 80 | 81 | // dump the document text and annotations for this segment 82 | System.out.println("********* NEW SEGMENT *********"); 83 | System.out.println(outCas.getDocumentText()); 84 | PrintAnnotations.printAnnotations(outCas, System.out); 85 | 86 | // release the CAS (important) 87 | outCas.release(); 88 | } 89 | 90 | // If there's a CAS Consumer inside this aggregate and we want 91 | // it's collectionProcessComplete method to be called, we need to 92 | // call it ourselves. If run inside a CPE this would get called 93 | // automatically. 94 | ae.collectionProcessComplete(); 95 | } catch (Exception e) { 96 | e.printStackTrace(); 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/SofaExampleAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples; 21 | 22 | import java.util.Arrays; 23 | import java.util.StringTokenizer; 24 | 25 | import org.apache.uima.analysis_component.CasAnnotator_ImplBase; 26 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 27 | import org.apache.uima.cas.CAS; 28 | import org.apache.uima.cas.Feature; 29 | import org.apache.uima.cas.Type; 30 | import org.apache.uima.cas.text.AnnotationFS; 31 | 32 | /** 33 | * A simple multiple subject of analysis (multi-Sofa) example annotator Expects an English text Sofa 34 | * as input Creates a German text Sofa as output 35 | * 36 | * This annotator has no configuration parameters, and requires no initialization method 37 | */ 38 | 39 | public class SofaExampleAnnotator extends CasAnnotator_ImplBase { 40 | public void process(CAS aCas) throws AnalysisEngineProcessException { 41 | CAS englishView, germanView; 42 | 43 | // get the CAS view for the English document 44 | englishView = aCas.getView("EnglishDocument"); 45 | 46 | // Create the German text Sofa and open its view 47 | germanView = aCas.createView("GermanDocument"); 48 | 49 | // Get some necessary Type System constants 50 | Type annot = englishView.getAnnotationType(); 51 | Type cross = englishView.getTypeSystem().getType("sofa.test.CrossAnnotation"); 52 | Feature other = cross.getFeatureByBaseName("otherAnnotation"); 53 | 54 | // Get the English text 55 | String engText = englishView.getDocumentText(); 56 | 57 | // Setup for translated text 58 | int engEnd = 0; 59 | int germBegin = 0; 60 | int germEnd = 0; 61 | StringBuffer translation = new StringBuffer(); 62 | 63 | // Parse the English text 64 | StringTokenizer st = new StringTokenizer(engText); 65 | while (st.hasMoreTokens()) { 66 | String thisTok = st.nextToken(); 67 | int engBegin = engText.indexOf(thisTok, engEnd); 68 | engEnd = engBegin + thisTok.length(); 69 | 70 | // Create token annotations on English text 71 | AnnotationFS engAnnot = englishView.createAnnotation(annot, engBegin, engEnd); 72 | englishView.addFsToIndexes(engAnnot); 73 | 74 | // Simple word-by-word translation 75 | String germWord = translate(thisTok); 76 | 77 | // Accumulate the translated text 78 | if (germBegin > 0) { 79 | translation.append(' '); 80 | germBegin += 1; 81 | } 82 | translation.append(germWord); 83 | 84 | // Create token annotations on German text 85 | germEnd = germBegin + germWord.length(); 86 | AnnotationFS germAnnot = germanView.createAnnotation(cross, germBegin, germEnd); 87 | germanView.addFsToIndexes(germAnnot); 88 | 89 | // add link to English text 90 | germAnnot.setFeatureValue(other, engAnnot); 91 | germBegin = germEnd; 92 | } 93 | 94 | // Finally, set the output tranlation Sofa data 95 | germanView.setDocumentText(translation.toString()); 96 | 97 | } 98 | 99 | static char wThis[] = { 't', 'h', 'i', 's' }; 100 | 101 | static char wBeer[] = { 'b', 'e', 'e', 'r' }; 102 | 103 | static char wIs[] = { 'i', 's' }; 104 | 105 | private String translate(String word) { 106 | String lword = word.toLowerCase(); 107 | if (Arrays.equals(wThis, lword.toCharArray())) 108 | return "das"; 109 | if (Arrays.equals(wBeer, lword.toCharArray())) 110 | return "bier"; 111 | if (Arrays.equals(wIs, lword.toCharArray())) 112 | return "ist"; 113 | return "gut"; 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/SofaExampleApplication.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples; 21 | 22 | import java.io.IOException; 23 | 24 | import org.apache.uima.UIMAException; 25 | import org.apache.uima.UIMAFramework; 26 | import org.apache.uima.analysis_engine.AnalysisEngine; 27 | import org.apache.uima.analysis_engine.AnalysisEngineDescription; 28 | import org.apache.uima.cas.CAS; 29 | import org.apache.uima.cas.FSIndex; 30 | import org.apache.uima.cas.FSIterator; 31 | import org.apache.uima.cas.Feature; 32 | import org.apache.uima.cas.Type; 33 | import org.apache.uima.cas.text.AnnotationFS; 34 | import org.apache.uima.util.XMLInputSource; 35 | 36 | /** 37 | * A simple Multiple Subject of Analysis (multi-Sofa) test application. Creates a text Sofa with 38 | * English text, calls an annotator that creates a text Sofa with German text, then dumps all 39 | * annotations found in both Sofas. 40 | * 41 | * The application takes no arguments. 42 | */ 43 | public class SofaExampleApplication { 44 | /** 45 | * Main program 46 | * 47 | */ 48 | public static void main(String[] args) throws UIMAException, IOException { 49 | // parse AnalysisEngine descriptor 50 | XMLInputSource input = new XMLInputSource( 51 | "descriptors/analysis_engine/SofaExampleAnnotator.xml"); 52 | AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription( 53 | input); 54 | 55 | // create AnalysisEngine 56 | AnalysisEngine seAnnotator = UIMAFramework.produceAnalysisEngine(desc); 57 | 58 | // create CAS 59 | CAS cas = seAnnotator.newCAS(); 60 | 61 | // Create the English document Sofa 62 | CAS englishView = cas.createView("EnglishDocument"); 63 | englishView.setDocumentText("this beer is good"); 64 | 65 | // call a CAS Analysis Engine that "translates" the English document 66 | // and puts the translation into a German Sofa 67 | seAnnotator.process(cas); 68 | 69 | // get annotation iterator for the English CAS view 70 | FSIndex anIndex = englishView.getAnnotationIndex(); 71 | FSIterator anIter = anIndex.iterator(); 72 | 73 | // and print out all annotations found 74 | System.out.println("---Printing all annotations for English Sofa---"); 75 | while (anIter.isValid()) { 76 | AnnotationFS annot = (AnnotationFS) anIter.get(); 77 | System.out.println(" " + annot.getType().getName() + ": " + annot.getCoveredText()); 78 | anIter.moveToNext(); 79 | } 80 | 81 | // now try to get the CAS view for the German Sofa 82 | System.out.println(); 83 | CAS germanView = cas.getView("GermanDocument"); 84 | 85 | // and annotator iterator for the German CAS View 86 | anIndex = germanView.getAnnotationIndex(); 87 | anIter = anIndex.iterator(); 88 | Type cross = germanView.getTypeSystem().getType("sofa.test.CrossAnnotation"); 89 | Feature other = cross.getFeatureByBaseName("otherAnnotation"); 90 | 91 | // print out all annotations for the German Sofa 92 | System.out.println("---Printing all annotations for German Sofa---"); 93 | while (anIter.isValid()) { 94 | AnnotationFS annot = (AnnotationFS) anIter.get(); 95 | System.out.println(" " + annot.getType().getName() + ": " + annot.getCoveredText()); 96 | if (annot.getType() == cross) { 97 | AnnotationFS crossAnnot = (AnnotationFS) annot.getFeatureValue(other); 98 | System.out.println(" other annotation feature: " + crossAnnot.getCoveredText()); 99 | } 100 | anIter.moveToNext(); 101 | } 102 | 103 | // Clean up 104 | seAnnotator.destroy(); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex4/MeetingAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex4; 21 | 22 | import java.util.Iterator; 23 | 24 | import org.apache.uima.UimaContext; 25 | import org.apache.uima.analysis_component.AnalysisComponent; 26 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 27 | import org.apache.uima.cas.FSIndex; 28 | import org.apache.uima.jcas.JCas; 29 | import org.apache.uima.resource.ResourceInitializationException; 30 | import org.apache.uima.tutorial.DateAnnot; 31 | import org.apache.uima.tutorial.Meeting; 32 | import org.apache.uima.tutorial.RoomNumber; 33 | import org.apache.uima.tutorial.TimeAnnot; 34 | 35 | /** 36 | * Example annotator that detects meetings from the co-occurrence of a RoomNumber, a Date, and two 37 | * Times (start and end), within a specified "window" size. 38 | */ 39 | public class MeetingAnnotator extends JCasAnnotator_ImplBase { 40 | /** 41 | * Size in characters of window within which a RoomNumber, a Date, and two Times must occur in 42 | * order for a meeting annotation to be created. 43 | */ 44 | private int mWindowSize; 45 | 46 | /** 47 | * @see AnalysisComponent#initialize(UimaContext) 48 | */ 49 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 50 | super.initialize(aContext); 51 | // Get config. parameter value 52 | mWindowSize = ((Integer) aContext.getConfigParameterValue("WindowSize")).intValue(); 53 | } 54 | 55 | /** 56 | * @see JCasAnnotator_ImplBase#process(JCas) 57 | */ 58 | public void process(JCas aJCas) { 59 | // get annotation indexes 60 | FSIndex roomNumberIndex = aJCas.getAnnotationIndex(RoomNumber.type); 61 | FSIndex dateIndex = aJCas.getAnnotationIndex(DateAnnot.type); 62 | FSIndex timeIndex = aJCas.getAnnotationIndex(TimeAnnot.type); 63 | 64 | // store end position of last meeting we identified, to prevent multiple 65 | // annotations over same span 66 | int lastMeetingEnd = -1; 67 | 68 | // iterate over all combinations 69 | Iterator roomNumberIter = roomNumberIndex.iterator(); 70 | while (roomNumberIter.hasNext()) { 71 | RoomNumber room = (RoomNumber) roomNumberIter.next(); 72 | 73 | Iterator dateIter = dateIndex.iterator(); 74 | while (dateIter.hasNext()) { 75 | DateAnnot date = (DateAnnot) dateIter.next(); 76 | 77 | Iterator time1Iter = timeIndex.iterator(); 78 | while (time1Iter.hasNext()) { 79 | TimeAnnot time1 = (TimeAnnot) time1Iter.next(); 80 | 81 | Iterator time2Iter = timeIndex.iterator(); 82 | while (time2Iter.hasNext()) { 83 | TimeAnnot time2 = (TimeAnnot) time2Iter.next(); 84 | 85 | // times must be different annotations 86 | if (time1 != time2) { 87 | // compute the begin and end of the span 88 | int minBegin = Math.min(Math.min(time1.getBegin(), time2.getBegin()), Math.min(date 89 | .getBegin(), room.getBegin())); 90 | int maxEnd = Math.max(Math.max(time1.getEnd(), time2.getEnd()), Math.max(date 91 | .getEnd(), room.getEnd())); 92 | 93 | // span must be smaller than the window size? 94 | if (maxEnd - minBegin < mWindowSize) { 95 | // span must not overlap the last annotation we made 96 | if (minBegin > lastMeetingEnd) { 97 | // annotate 98 | Meeting mtg = new Meeting(aJCas, minBegin, maxEnd, room, date, time1, time2); 99 | mtg.addToIndexes(); 100 | lastMeetingEnd = maxEnd; 101 | } 102 | } 103 | } 104 | } 105 | } 106 | } 107 | } 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /data/xml/New_IBM_Fellows.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | IBM Names Five Fellows, Company's Highest Techinical Honor 26 | 05 June 2002 27 | 28 | IBM today elevated five employees to the title of IBM Fellow -- its most prestigious technical honor. The company also presented more than $2.8 million in cash awards to employees whose technical innovation have yielded exceptional value to the company and its customers. 29 | 30 | IBM conferred the accolades and awards at its 2003 Corporate Technical Recognition Event (CTRE) in Scottsdale, Ariz. CTRE is a 40-year tradition at IBM, established to recognize exceptional technical employees and reward them for extraordinary achievements and contributions to the company's technology leadership. 31 | 32 | "Our technical employees are among the best and brightest innovators in the world. They share a passion for excellence that defines their work and permeates the products and services IBM delivers to its customers," said Nick Donofrio, senior vice president, technology and manufacturing for IBM. "CTRE provides the means for us to honor those who have distinguished themselves as exceptional leaders among their peers." 33 | 34 | Among the special honorees at the 2003 CTRE are five employees who earned the coveted distinction of IBM Fellow: 35 | 36 | 37 | - Grady Booch, chief scientist of Rational Software, IBM Software Group. Recognized internationally for his innovative work on software architecture, modeling, and software engineering process. Mr. Booch is one of the original authors of the Unified Modeling Language (UML), the industry-standard language of blueprints for software-intensive systems. 38 | 39 | - Dr. Donald Chamberlin, researcher, IBM Almaden Research Center. An expert in relational database languages, Dr. Chamberlin is co- inventor of SQL, the language that energized the relational database market. He has also influenced the creation of XQuery, one of a new generation of database query languages covering structured, semi-structured and unstructured data. 40 | 41 | - Dr. George Galambos, chief technology officer, IBM Global Services (IGS) in Canada; the first Fellow from Canada. Dr. Galambos specializes in high-performance, high availability designs, operational effectiveness, and risk assessment/mitigation, focusing on systems engineering and architecture reuse that enhances efficiency and stability. He is a principal driver of and contributor to the widely acclaimed "Patterns for e-business" and the Enterprise Solution Structure Reference Architectures, widely used by IGS in customer engagements. 42 | 43 | - Rod Smith, vice president of Internet emerging technologies, IBM Software Group. A leader in the areas of object-oriented programming, visual development tools, Java, XML, and Web Services. Rod also was the chief technical strategist for focusing the Java platform for use in middleware solutions, in particular initiating contributions to the development of the J2EE. 44 | 45 | - Charles Webb, eServer processor design, IBM Systems Group. Charles Webb has led the reinvention of IBM's eServer zSeries microprocessor designs and roadmap, including the z900 server, where he provided the bridge among architecture, hardware, compilers and system software, defining major portions of the 64- bit architecture and beyond. 46 | 47 | 48 | The title of IBM Fellow is the company's most preeminent technical distinction and is granted in recognition of outstanding and sustained technical achievements in engineering, programming, science and technology. Only 175 individuals have earned this designation in the company's history and, including the newly named Fellows, 56 are active employees. IBM Fellows are encouraged to further enhance their potential for creative achievements and typically work on special projects or research initiatives that lead the company in exciting new directions. 49 | 50 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/xmi/XmiCollectionReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.xmi; 21 | 22 | import java.io.File; 23 | import java.io.FileInputStream; 24 | import java.io.IOException; 25 | import java.util.ArrayList; 26 | 27 | import org.apache.uima.cas.CAS; 28 | import org.apache.uima.cas.impl.XmiCasDeserializer; 29 | import org.apache.uima.collection.CollectionException; 30 | import org.apache.uima.collection.CollectionReader_ImplBase; 31 | import org.apache.uima.resource.ResourceConfigurationException; 32 | import org.apache.uima.resource.ResourceInitializationException; 33 | import org.apache.uima.util.Progress; 34 | import org.apache.uima.util.ProgressImpl; 35 | import org.xml.sax.SAXException; 36 | 37 | /** 38 | * A simple collection reader that reads CASes in XMI format from a directory in the filesystem. 39 | */ 40 | public class XmiCollectionReader extends CollectionReader_ImplBase { 41 | /** 42 | * Name of configuration parameter that must be set to the path of a directory containing the XMI 43 | * files. 44 | */ 45 | public static final String PARAM_INPUTDIR = "InputDirectory"; 46 | 47 | /** 48 | * Name of the configuration parameter that must be set to indicate if the 49 | * execution fails if an encountered type is unknown 50 | */ 51 | public static final String PARAM_FAILUNKNOWN = "FailOnUnknownType"; 52 | 53 | private Boolean mFailOnUnknownType; 54 | 55 | private ArrayList mFiles; 56 | 57 | private int mCurrentIndex; 58 | 59 | /** 60 | * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize() 61 | */ 62 | public void initialize() throws ResourceInitializationException { 63 | mFailOnUnknownType = (Boolean) getConfigParameterValue(PARAM_FAILUNKNOWN); 64 | if (null == mFailOnUnknownType) { 65 | mFailOnUnknownType = true; // default to true if not specified 66 | } 67 | File directory = new File(((String) getConfigParameterValue(PARAM_INPUTDIR)).trim()); 68 | mCurrentIndex = 0; 69 | 70 | // if input directory does not exist or is not a directory, throw exception 71 | if (!directory.exists() || !directory.isDirectory()) { 72 | throw new ResourceInitializationException(ResourceConfigurationException.DIRECTORY_NOT_FOUND, 73 | new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), directory.getPath() }); 74 | } 75 | 76 | // get list of .xmi files in the specified directory 77 | mFiles = new ArrayList(); 78 | File[] files = directory.listFiles(); 79 | for (int i = 0; i < files.length; i++) { 80 | if (!files[i].isDirectory() && files[i].getName().endsWith(".xmi")) { 81 | mFiles.add(files[i]); 82 | } 83 | } 84 | } 85 | 86 | /** 87 | * @see org.apache.uima.collection.CollectionReader#hasNext() 88 | */ 89 | public boolean hasNext() { 90 | return mCurrentIndex < mFiles.size(); 91 | } 92 | 93 | /** 94 | * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS) 95 | */ 96 | public void getNext(CAS aCAS) throws IOException, CollectionException { 97 | File currentFile = (File) mFiles.get(mCurrentIndex++); 98 | FileInputStream inputStream = new FileInputStream(currentFile); 99 | try { 100 | XmiCasDeserializer.deserialize(inputStream, aCAS, ! mFailOnUnknownType); 101 | } catch (SAXException e) { 102 | throw new CollectionException(e); 103 | } finally { 104 | inputStream.close(); 105 | } 106 | } 107 | 108 | /** 109 | * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close() 110 | */ 111 | public void close() throws IOException { 112 | } 113 | 114 | /** 115 | * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress() 116 | */ 117 | public Progress[] getProgress() { 118 | return new Progress[] { new ProgressImpl(mCurrentIndex, mFiles.size(), Progress.ENTITIES) }; 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /target/classes/types/TutorialTypeSystem.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | TutorialTypeSystem 26 | Type System Definition for the tutorial examples - as of Exercise 6 27 | 1.0 28 | The Apache Software Foundation 29 | 30 | 31 | org.apache.uima.tutorial.RoomNumber 32 | 33 | uima.tcas.Annotation 34 | 35 | 36 | building 37 | Building containing this room 38 | uima.cas.String 39 | 40 | 41 | 42 | 43 | org.apache.uima.tutorial.DateTimeAnnot 44 | 45 | uima.tcas.Annotation 46 | 47 | 48 | shortDateString 49 | 50 | uima.cas.String 51 | 52 | 53 | 54 | 55 | org.apache.uima.tutorial.TimeAnnot 56 | 57 | org.apache.uima.tutorial.DateTimeAnnot 58 | 59 | 60 | 61 | org.apache.uima.tutorial.DateAnnot 62 | 63 | org.apache.uima.tutorial.DateTimeAnnot 64 | 65 | 66 | 67 | org.apache.uima.tutorial.Meeting 68 | 69 | uima.tcas.Annotation 70 | 71 | 72 | room 73 | 74 | org.apache.uima.tutorial.RoomNumber 75 | 76 | 77 | date 78 | 79 | org.apache.uima.tutorial.DateAnnot 80 | 81 | 82 | startTime 83 | 84 | org.apache.uima.tutorial.TimeAnnot 85 | 86 | 87 | endTime 88 | 89 | org.apache.uima.tutorial.TimeAnnot 90 | 91 | 92 | 93 | 94 | org.apache.uima.tutorial.UimaAcronym 95 | 96 | uima.tcas.Annotation 97 | 98 | 99 | expandedForm 100 | 101 | uima.cas.String 102 | 103 | 104 | 105 | 106 | org.apache.uima.tutorial.UimaMeeting 107 | 108 | org.apache.uima.tutorial.Meeting 109 | 110 | 111 | org.apache.uima.examples.tokenizer.Token 112 | 113 | uima.tcas.Annotation 114 | 115 | 116 | org.apache.uima.examples.tokenizer.Sentence 117 | 118 | uima.tcas.Annotation 119 | 120 | 121 | -------------------------------------------------------------------------------- /src/main/resources/types/TutorialTypeSystem.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | TutorialTypeSystem 26 | Type System Definition for the tutorial examples - as of Exercise 6 27 | 1.0 28 | The Apache Software Foundation 29 | 30 | 31 | org.apache.uima.tutorial.RoomNumber 32 | 33 | uima.tcas.Annotation 34 | 35 | 36 | building 37 | Building containing this room 38 | uima.cas.String 39 | 40 | 41 | 42 | 43 | org.apache.uima.tutorial.DateTimeAnnot 44 | 45 | uima.tcas.Annotation 46 | 47 | 48 | shortDateString 49 | 50 | uima.cas.String 51 | 52 | 53 | 54 | 55 | org.apache.uima.tutorial.TimeAnnot 56 | 57 | org.apache.uima.tutorial.DateTimeAnnot 58 | 59 | 60 | 61 | org.apache.uima.tutorial.DateAnnot 62 | 63 | org.apache.uima.tutorial.DateTimeAnnot 64 | 65 | 66 | 67 | org.apache.uima.tutorial.Meeting 68 | 69 | uima.tcas.Annotation 70 | 71 | 72 | room 73 | 74 | org.apache.uima.tutorial.RoomNumber 75 | 76 | 77 | date 78 | 79 | org.apache.uima.tutorial.DateAnnot 80 | 81 | 82 | startTime 83 | 84 | org.apache.uima.tutorial.TimeAnnot 85 | 86 | 87 | endTime 88 | 89 | org.apache.uima.tutorial.TimeAnnot 90 | 91 | 92 | 93 | 94 | org.apache.uima.tutorial.UimaAcronym 95 | 96 | uima.tcas.Annotation 97 | 98 | 99 | expandedForm 100 | 101 | uima.cas.String 102 | 103 | 104 | 105 | 106 | org.apache.uima.tutorial.UimaMeeting 107 | 108 | org.apache.uima.tutorial.Meeting 109 | 110 | 111 | org.apache.uima.examples.tokenizer.Token 112 | 113 | uima.tcas.Annotation 114 | 115 | 116 | org.apache.uima.examples.tokenizer.Sentence 117 | 118 | uima.tcas.Annotation 119 | 120 | 121 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/casMultiplier/SimpleTextSegmenter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples.casMultiplier; 21 | 22 | import org.apache.uima.UimaContext; 23 | import org.apache.uima.analysis_component.JCasMultiplier_ImplBase; 24 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 25 | import org.apache.uima.cas.AbstractCas; 26 | import org.apache.uima.cas.FSIterator; 27 | import org.apache.uima.examples.SourceDocumentInformation; 28 | import org.apache.uima.jcas.JCas; 29 | import org.apache.uima.resource.ResourceInitializationException; 30 | 31 | /** 32 | * An example CasMultiplier, which breaks large text documents into smaller segments. The minimum 33 | * size of the segments as determined by the "SegmentSize" configuration parameter, but the break 34 | * between segments will always occur at the next newline character, so segments will not be exactly 35 | * that size. 36 | */ 37 | public class SimpleTextSegmenter extends JCasMultiplier_ImplBase { 38 | private String mDoc; 39 | 40 | private int mPos; 41 | 42 | private int mSegmentSize; 43 | 44 | private String mDocUri; 45 | 46 | /* 47 | * (non-Javadoc) 48 | * 49 | * @see org.apache.uima.analysis_component.AnalysisComponent_ImplBase#initialize(org.apache.uima.UimaContext) 50 | */ 51 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 52 | super.initialize(aContext); 53 | mSegmentSize = ((Integer) aContext.getConfigParameterValue("SegmentSize")).intValue(); 54 | } 55 | 56 | /* 57 | * (non-Javadoc) 58 | * 59 | * @see JCasMultiplier_ImplBase#process(JCas) 60 | */ 61 | public void process(JCas aJCas) throws AnalysisEngineProcessException { 62 | mDoc = aJCas.getDocumentText(); 63 | mPos = 0; 64 | // retreive the filename of the input file from the CAS so that it can be added 65 | // to each segment 66 | FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); 67 | if (it.hasNext()) { 68 | SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next(); 69 | mDocUri = fileLoc.getUri(); 70 | } else { 71 | mDocUri = null; 72 | } 73 | } 74 | 75 | /* 76 | * (non-Javadoc) 77 | * 78 | * @see org.apache.uima.analysis_component.AnalysisComponent#hasNext() 79 | */ 80 | public boolean hasNext() throws AnalysisEngineProcessException { 81 | return mPos < mDoc.length(); 82 | } 83 | 84 | /* 85 | * (non-Javadoc) 86 | * 87 | * @see org.apache.uima.analysis_component.AnalysisComponent#next() 88 | */ 89 | public AbstractCas next() throws AnalysisEngineProcessException { 90 | int breakAt = mPos + mSegmentSize; 91 | if (breakAt > mDoc.length()) 92 | breakAt = mDoc.length(); 93 | // search for the next newline character. Note: this example segmenter implementation 94 | // assumes that the document contains many newlines. In the worst case, if this segmenter 95 | // is runon a document with no newlines, it will produce only one segment containing the 96 | // entire document text. A better implementation might specify a maximum segment size as 97 | // well as a minimum. 98 | while (breakAt < mDoc.length() && mDoc.charAt(breakAt - 1) != '\n') 99 | breakAt++; 100 | 101 | JCas jcas = getEmptyJCas(); 102 | try { 103 | jcas.setDocumentText(mDoc.substring(mPos, breakAt)); 104 | // if original CAS had SourceDocumentInformation, also add SourceDocumentInformatio 105 | // to each segment 106 | if (mDocUri != null) { 107 | SourceDocumentInformation sdi = new SourceDocumentInformation(jcas); 108 | sdi.setUri(mDocUri); 109 | sdi.setOffsetInSource(mPos); 110 | sdi.setDocumentSize(breakAt - mPos); 111 | sdi.addToIndexes(); 112 | 113 | if (breakAt == mDoc.length()) { 114 | sdi.setLastSegment(true); 115 | } 116 | } 117 | 118 | mPos = breakAt; 119 | return jcas; 120 | } catch (Exception e) { 121 | jcas.release(); 122 | throw new AnalysisEngineProcessException(e); 123 | } 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/examples/ExampleApplication.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.examples; 21 | 22 | import java.io.BufferedInputStream; 23 | import java.io.File; 24 | import java.io.IOException; 25 | 26 | import org.apache.uima.UIMAFramework; 27 | import org.apache.uima.analysis_engine.AnalysisEngine; 28 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 29 | import org.apache.uima.cas.CAS; 30 | import org.apache.uima.resource.ResourceSpecifier; 31 | import org.apache.uima.util.FileUtils; 32 | import org.apache.uima.util.XMLInputSource; 33 | 34 | /** 35 | * An example application that reads documents from files, sends them though an Analysis Engine, and 36 | * prints all discovered annotations to System.out. 37 | *

38 | * The application takes two arguments: 39 | *

    40 | *
  1. The path to an XML descriptor for the Analysis Engine to be executed
  2. 41 | *
  3. An input directory containing files to be processed
  4. 42 | *
43 | */ 44 | public class ExampleApplication { 45 | /** 46 | * Main program. 47 | * 48 | * @param args 49 | * Command-line arguments - see class description 50 | */ 51 | public static void main(String[] args) { 52 | try { 53 | File taeDescriptor = null; 54 | File inputDir = null; 55 | 56 | // Read and validate command line arguments 57 | boolean validArgs = false; 58 | if (args.length == 2) { 59 | taeDescriptor = new File(args[0]); 60 | inputDir = new File(args[1]); 61 | 62 | validArgs = taeDescriptor.exists() && !taeDescriptor.isDirectory() 63 | && inputDir.isDirectory(); 64 | } 65 | if (!validArgs) { 66 | printUsageMessage(); 67 | } else { 68 | // get Resource Specifier from XML file 69 | XMLInputSource in = new XMLInputSource(taeDescriptor); 70 | ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); 71 | 72 | // for debugging, output the Resource Specifier 73 | // System.out.println(specifier); 74 | 75 | // create Analysis Engine 76 | AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier); 77 | // create a CAS 78 | CAS cas = ae.newCAS(); 79 | 80 | // get all files in the input directory 81 | File[] files = inputDir.listFiles(); 82 | if (files == null) { 83 | System.out.println("No files to process"); 84 | } else { 85 | // process documents 86 | for (int i = 0; i < files.length; i++) { 87 | if (!files[i].isDirectory()) { 88 | processFile(files[i], ae, cas); 89 | } 90 | } 91 | } 92 | ae.destroy(); 93 | } 94 | } catch (Exception e) { 95 | e.printStackTrace(); 96 | } 97 | } 98 | 99 | /** 100 | * Prints usage message. 101 | */ 102 | private static void printUsageMessage() { 103 | System.err.println("Usage: java org.apache.uima.example.ExampleApplication " 104 | + " "); 105 | } 106 | 107 | /** 108 | * Processes a single XML file and prints annotations to System.out 109 | * 110 | * @param aFile 111 | * file to process 112 | * @param aAE 113 | * Analysis Engine that will process the file 114 | * @param aCAS 115 | * CAS that will be used to hold analysis results 116 | */ 117 | private static void processFile(File aFile, AnalysisEngine aAE, CAS aCAS) throws IOException, 118 | AnalysisEngineProcessException { 119 | System.out.println("Processing file " + aFile.getName()); 120 | 121 | String document = FileUtils.file2String(aFile); 122 | document = document.trim(); 123 | 124 | // put document text in CAS 125 | aCAS.setDocumentText(document); 126 | 127 | // process 128 | aAE.process(aCAS); 129 | 130 | // print annotations to System.out 131 | PrintAnnotations.printAnnotations(aCAS, System.out); 132 | 133 | // reset the CAS to prepare it for processing the next document 134 | aCAS.reset(); 135 | } 136 | 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/org/apache/uima/tutorial/ex6/UimaMeetingAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.uima.tutorial.ex6; 21 | 22 | import java.util.ArrayList; 23 | import java.util.Iterator; 24 | import java.util.List; 25 | import java.util.StringTokenizer; 26 | 27 | import org.apache.uima.UimaContext; 28 | import org.apache.uima.analysis_component.AnalysisComponent; 29 | import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; 30 | import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 31 | import org.apache.uima.cas.FSIndex; 32 | import org.apache.uima.cas.FSIterator; 33 | import org.apache.uima.jcas.JCas; 34 | import org.apache.uima.resource.ResourceAccessException; 35 | import org.apache.uima.resource.ResourceInitializationException; 36 | import org.apache.uima.tutorial.Meeting; 37 | import org.apache.uima.tutorial.UimaMeeting; 38 | 39 | /** 40 | * Example annotator that iterates over Meeting annotations and annotates a meeting as a UimaMeeting 41 | * if a UIMA acronym occurs in close proximity to that meeting. When combined in an aggregate TAE 42 | * with the UimaAcronymAnnotator, demonstrates the use of the ResourceManager to share data between 43 | * annotators. 44 | * 45 | * 46 | */ 47 | public class UimaMeetingAnnotator extends JCasAnnotator_ImplBase { 48 | /** Map whose keys are UIMA terms. */ 49 | private StringMapResource mMap; 50 | 51 | /** 52 | * @see AnalysisComponent#initialize(UimaContext) 53 | */ 54 | public void initialize(UimaContext aContext) throws ResourceInitializationException { 55 | super.initialize(aContext); 56 | try { 57 | // get a reference to the String Map Resource 58 | mMap = (StringMapResource) getContext().getResourceObject("UimaTermTable"); 59 | } catch (ResourceAccessException e) { 60 | throw new ResourceInitializationException(e); 61 | } 62 | } 63 | 64 | /** 65 | * @see JCasAnnotator_ImplBase#process(JCas) 66 | */ 67 | public void process(JCas aJCas) throws AnalysisEngineProcessException { 68 | // get document text 69 | String text = aJCas.getDocumentText(); 70 | 71 | // We iterate over all Meeting annotations, and if we determine that 72 | // the topic of a meeting is UIMA-related, we create a UimaMeeting 73 | // annotation. We add each UimaMeeting annotation to a list, and then 74 | // later go back and add these to the CAS indexes. We need to do this 75 | // because it's not allowed to add to an index that you're currently 76 | // iterating over. 77 | List uimaMeetings = new ArrayList(); 78 | 79 | FSIndex meetingIndex = aJCas.getAnnotationIndex(Meeting.type); 80 | FSIterator iter = meetingIndex.iterator(); 81 | while (iter.isValid()) { 82 | Meeting meeting = (Meeting) iter.get(); 83 | // get span of text within 50 chars on either side of meeting 84 | // (window size should probably be a config. param) 85 | int begin = meeting.getBegin() - 50; 86 | int end = meeting.getEnd() + 50; 87 | if (begin < 0) { 88 | begin = 0; 89 | } 90 | if (end > text.length()) { 91 | end = text.length(); 92 | } 93 | String window = text.substring(begin, end); 94 | 95 | // look for UIMA acronyms within this window 96 | StringTokenizer tokenizer = new StringTokenizer(window, " \t\n\r.<.>/?\";:[{]}\\|=+()!"); 97 | while (tokenizer.hasMoreTokens()) { 98 | String token = tokenizer.nextToken(); 99 | // look up token in map to see if it is an acronym 100 | if (mMap.get(token) != null) { 101 | // create annotation 102 | UimaMeeting annot = new UimaMeeting(aJCas, meeting.getBegin(), meeting.getEnd(), meeting 103 | .getRoom(), meeting.getDate(), meeting.getStartTime(), meeting.getEndTime()); 104 | // Add annotation to a list, to be later added to the indexes. 105 | // We need to do this because it's not allowed to add to an 106 | // index that you're currently iterating over. 107 | uimaMeetings.add(annot); 108 | break; 109 | } 110 | } 111 | 112 | iter.moveToNext(); 113 | } 114 | 115 | Iterator uimaMeetingIter = uimaMeetings.iterator(); 116 | while (uimaMeetingIter.hasNext()) { 117 | UimaMeeting annot = (UimaMeeting) uimaMeetingIter.next(); 118 | annot.addToIndexes(); 119 | } 120 | } 121 | 122 | } 123 | --------------------------------------------------------------------------------