├── .github ├── ISSUE_TEMPLATE │ ├── -report-a-vulnerability-.md │ ├── feature_request.md │ └── issue-report.md ├── build_operators.sh ├── build_operators_commits.txt ├── pull_request_template.md └── workflows │ ├── build_operators.yaml │ ├── build_push_container_image.yaml │ ├── integration-test.yml │ ├── pylint.yml │ ├── python-app.yml │ └── python-package-conda.yml ├── .gitignore ├── CHANGELOG.md ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FAQ.md ├── LICENSE.md ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── VULNERABILITIES.md ├── component-library ├── analyze │ ├── spark-ts-trends.ipynb │ └── spark-ts-trends.yaml ├── anomaly │ ├── anomaly-score-unsupervised.ipynb │ └── anomaly-score-unsupervised │ │ ├── Dockerfile │ │ ├── build.sh │ │ ├── dapr │ │ ├── publish_event.sh │ │ ├── pubsub.yaml │ │ ├── start_sidecar.sh │ │ ├── start_subscriber.sh │ │ ├── statestore.yaml │ │ ├── subscriber.py │ │ └── subscription.yaml │ │ ├── test-anomaly-score-unsupervised.ipynb │ │ ├── watsoniotp.broken.phase_aligned.pickle │ │ └── watsoniotp.healthy.phase_aligned.pickle ├── checkpoint │ ├── pull_asset.ipynb │ ├── pull_asset.yaml │ ├── store_asset.ipynb │ └── store_asset.yaml ├── claimed_utils.py ├── deploy │ ├── README.md │ ├── condition-blessing.ipynb │ ├── condition-blessing.yaml │ ├── deploy-kfserving.ipynb │ ├── deploy_watson_machine_learning.ipynb │ ├── deploy_watson_machine_learning.yaml │ ├── deploy_wml_pmml.ipynb │ └── deploy_wml_pmml.yaml ├── examples │ ├── alert_for_content_in_url.cwl │ ├── alert_for_content_in_url.ipynb │ ├── alert_for_content_in_url.job.yaml │ ├── alert_for_content_in_url.yaml │ ├── fibonacci.cwl │ ├── fibonacci.job.yaml │ ├── fibonacci.py │ ├── fibonacci.yaml │ ├── hello_world.ipynb │ ├── hello_world.job.yaml │ └── hello_world.yaml ├── filter │ ├── README.md │ ├── filter.cwl │ ├── filter.ipynb │ ├── filter.job.yaml │ ├── filter.yaml │ ├── filter_docker.cwl │ └── spark-sample.ipynb ├── generic-notebook-runner.ipynb ├── geo │ └── gdal.ipynb ├── input │ ├── README.md │ ├── input-Xview-download.cwl │ ├── input-Xview-download.ipynb │ ├── input-Xview-download.job.yaml │ ├── input-Xview-download.yaml │ ├── input-climate-copernicus.ipynb │ ├── input-climate-copernicus.yaml │ ├── input-codenet-LangClass.ipynb │ ├── input-codenet-LangClass.yaml │ ├── input-cos-zarr.ipynb │ ├── input-covid-chestxray.ipynb │ ├── input-covid-chestxray.yaml │ ├── input-from-mongodb.ipynb │ ├── input-hmp.ipynb │ ├── input-hmp.yaml │ ├── input-mqtt.ipynb │ ├── input-pardata.ipynb │ ├── input-pardata.yaml │ ├── input-pei.ipynb │ ├── input-postgresql.ipynb │ ├── input-postgresql.yaml │ ├── input-rki-covid19-deaths.ipynb │ ├── input-swissmedic.ipynb │ ├── input-url.ipynb │ ├── input-url.yaml │ ├── input-webcam.ipynb │ └── ls-cos.ipynb ├── metric │ ├── README.md │ ├── metric-aif360.ipynb │ ├── metric-aif360.yaml │ ├── metric-aix360-lime.ipynb │ ├── metric-aix360-lime.yaml │ ├── metric-confusion-matrix.ipynb │ └── metric-confusion-matrix.yaml ├── monitoring │ ├── README.md │ ├── notification-email.ipynb │ └── notification-email.yaml ├── nlp │ ├── nlp-classify-text-simple.ipynb │ └── nlp-classify-text-simple.yaml ├── output │ ├── output-elastic.ipynb │ ├── output-rdbms-sqlalchemy.ipynb │ ├── output-rdbms-sqlalchemy.yaml │ ├── upload-to-cos-http-adapter.ipynb │ ├── upload-to-cos.cwl │ ├── upload-to-cos.ipynb │ ├── upload-to-cos.job.yaml │ └── upload-to-cos.yaml ├── predict │ ├── README.md │ ├── image-endpoint-tester.ipynb │ ├── image-endpoint.ipynb │ ├── predict-images.ipynb │ ├── predict-images.yaml │ ├── tvn2.ipynb │ └── yolo.ipynb ├── run_tests.py ├── segment-anything │ ├── generate-masks.ipynb │ ├── generate-masks.yaml │ ├── get-masks.ipynb │ └── get-masks.yaml ├── sim │ ├── wrf.ipynb │ └── wrf.yaml ├── train │ ├── README.md │ ├── nvflare.ipynb │ ├── spark-train-lr.ipynb │ ├── spark-train-lr.yaml │ ├── train-mobilenet_v2.ipynb │ └── train-mobilenet_v2.yaml ├── transform │ ├── README.md │ ├── alchemy-sql-query.ipynb │ ├── cloud-object-store-housekeeping.ipynb │ ├── cpdconfig.yaml │ ├── ibm-sql-query-cpd-manual.yaml │ ├── ibm-sql-query-cpd-test.ipynb │ ├── ibm-sql-query-cpd.ipynb │ ├── ibm-sql-query-cpd.yaml │ ├── ibm-sql-query-test.ipynb │ ├── ibm-sql-query.config │ ├── ibm-sql-query.dockerfile │ ├── ibm-sql-query.ipynb │ ├── ibm-sql-query.secrets.template │ ├── ibm-sql-query.yaml │ ├── image-tiling-with-metadata_adjustment.cwl │ ├── image-tiling-with-metadata_adjustment.cwl:Zone.Identifier │ ├── image-tiling-with-metadata_adjustment.ipynb │ ├── image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier │ ├── image-tiling-with-metadata_adjustment.job.yaml │ ├── image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier │ ├── image-tiling-with-metadata_adjustment.yaml │ ├── image-tiling-with-metadata_adjustment.yaml:Zone.Identifier │ ├── spark-condense-parquet.ipynb │ ├── spark-condense-parquet.yaml │ ├── spark-csv-to-parquet.ipynb │ ├── spark-csv-to-parquet.yaml │ ├── spark-json-to-parquet.ipynb │ ├── spark-json-to-parquet.yaml │ ├── spark-parquet-to-csv.ipynb │ ├── spark-sql-interactive.ipynb │ ├── spark-sql-interactive │ │ ├── Dockerfile │ │ ├── app.py │ │ └── build.sh │ ├── spark-sql.ipynb │ ├── spark-sql.yaml │ ├── transform-apply.ipynb │ ├── transform-images.ipynb │ └── transform-images.yaml ├── util │ ├── cgw-util-cos-sync.ipynb │ ├── old_util-cos.ipynb │ ├── sparksql-interactive.ipynb │ ├── util-cos.cwl │ ├── util-cos.job.yaml │ ├── util-cos.py │ └── util-cos.yaml ├── visualize │ ├── map-from-coordinates.ipynb │ ├── timeseries-runchart.ipynb │ ├── timeseries-runchart.yaml │ ├── visualize-with-quickchart-mongodb.ipynb │ └── visualize-with-quickchart.ipynb └── voila-notebook-runner.ipynb ├── setup.py └── test_requirements.txt /.github/ISSUE_TEMPLATE/-report-a-vulnerability-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: " Report a vulnerability " 3 | about: " Report a vulnerability " 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Summary 11 | _Short summary of the problem. Make the impact and severity as clear as possible. For example: An unsafe deserialization vulnerability allows any unauthenticated user to execute arbitrary code on the server._ 12 | 13 | ### Details 14 | _Give all details on the vulnerability. Pointing to the incriminated source code is very helpful for the maintainer._ 15 | 16 | ### PoC 17 | _Complete instructions, including specific configuration details, to reproduce the vulnerability._ 18 | 19 | ### Impact 20 | _What kind of vulnerability is it? Who is impacted?_ 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | 22 | Please note that additional functionality must be covered by automated testing: https://github.com/claimed-framework/community/blob/main/contribution_process.md 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the issue** 11 | A clear and concise description of what the issue is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Screenshots or log output** 21 | If applicable, add screenshots or log output to help explain your problem. 22 |
Log Output 23 |
24 | Paste the log output here.
25 | 
26 |
27 | 28 | **Expected behavior** 29 | A clear and concise description of what you expected to happen. 30 | 31 | **Deployment information** 32 | Describe what you've deployed and how: 33 | - CLAIMED version: [e.g. 1.5.3] 34 | - Installation source: [e.g. git, dockerhub] 35 | - Runtime type: [e.g. ipython, docker, kubernetes, knative, kubeflow ] 36 | -------------------------------------------------------------------------------- /.github/build_operators.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script creates operators for all operator files in the last commit and pushes the images to a registry. 3 | # The KFP component yaml and Kubernetes job yaml files are added to git and pushed to branch main. 4 | # TODO: claimed-c3 v0.2.5 is using the default version 0.1 and cannot auto-increase the version. 5 | 6 | echo 'Running build_operators.sh' 7 | 8 | git checkout main 9 | # Get commit ids 10 | log_file=".github/build_operators_commits.txt" 11 | last_commit=$(sed -n '$p' $log_file) 12 | echo "Last commit: "$last_commit 13 | current_commit=$(git rev-parse --short main) 14 | echo "Current commit: "$current_commit 15 | # Get list of changed files from last build 16 | file_list=$(git diff --name-only $last_commit $current_commit) 17 | echo 'File list: '$file_list 18 | # Add current commit id to log 19 | echo "$current_commit" >> "$log_file" 20 | git add $log_file 21 | 22 | # Get default repository from env 23 | default_repository=${repository:-docker.io/romeokienzler} 24 | echo 'default repository: '$default_repository 25 | default_log_level=${log_level:-INFO} 26 | echo 'default log_level: '$default_log_level 27 | image_list='' 28 | 29 | for file in $file_list 30 | do 31 | # Check if the file is in the directory operators and ends with .py or .ipynb 32 | if [[ $file =~ ^operators/.*\.(py|ipynb)$ ]]; then 33 | echo "Processing file "$file 34 | 35 | if ! [ -f $file ]; then 36 | # File not found in main 37 | echo "File not found." 38 | continue 39 | fi 40 | 41 | dir=$(dirname "$file") 42 | bname="$(basename ${file})" 43 | 44 | # Reset variables 45 | gridwrapper=false 46 | cos=false 47 | process=false 48 | repository=False 49 | version=false 50 | additional_files=false 51 | log_level=false 52 | dockerfile_template_path=false 53 | image='' 54 | 55 | # Reading settings from optional cfg file 56 | config_file=${file%.*}.cfg 57 | if [ -f $config_file ]; then 58 | while read LINE; do declare "$LINE"; done < $config_file 59 | else 60 | # Missing cfg file 61 | echo "Config file not found, skipping file. Please add .cfg for to create the operator." 62 | continue 63 | fi 64 | 65 | # Get c3 command 66 | if [[ -n $gridwrapper && $gridwrapper != 'false' ]]; then 67 | # Create grid wrapper 68 | command='c3_create_gridwrapper '$file 69 | 70 | # Add process name for grid wrapper 71 | if [[ -n $process && $process != 'false' ]]; then 72 | command=' -p '$process 73 | else 74 | command=' -p grid_process' 75 | fi 76 | 77 | if [[ -n $cos && $cos != 'false' ]]; then 78 | # Use cos grid wrapper 79 | command+=' --cos' 80 | # Add cos grid wrapper files to git 81 | git_files=${dir}/cgw_${bname%.*}.py 82 | git_files+=' '${dir}/cgw_${bname%.*}.yaml 83 | git_files+=' '${dir}/cgw_${bname%.*}.job.yaml 84 | else 85 | # Add grid wrapper files to git 86 | git_files=${dir}/gw_${bname%.*}.py 87 | git_files+=' '${dir}/gw_${bname%.*}.yaml 88 | git_files+=' '${dir}/gw_${bname%.*}.job.yaml 89 | fi 90 | else 91 | # Create normal operator 92 | command='c3_create_operator '$file 93 | # Add KFP component yaml and Kubernetes job yaml to git 94 | git_files=${file%.*}.yaml 95 | git_files+=' '${file%.*}.job.yaml 96 | fi 97 | 98 | # Get repository 99 | if [[ -n $repository && $repository != 'false' ]]; then 100 | command+=' -r '$repository 101 | else 102 | # Use default repository 103 | command+=' -r '$default_repository 104 | fi 105 | 106 | # Optionally add version 107 | if [[ -n $version && $version != 'false' ]]; then 108 | command+=' -v '$version 109 | fi 110 | 111 | # Optionally add additional files 112 | if [[ -n $additional_files && $additional_files != 'false' ]]; then 113 | command+=' '$additional_files 114 | fi 115 | 116 | # Add log_level 117 | if [[ -n $log_level && $log_level != 'false' ]]; then 118 | command+=' -l '$log_level 119 | else 120 | command+=' -l '$default_log_level 121 | fi 122 | 123 | # Optionally add dockerfile_template_path 124 | if [[ -n $dockerfile_template_path && $dockerfile_template_path != 'false' ]]; then 125 | command+=' --dockerfile_template_path '$dockerfile_template_path 126 | fi 127 | 128 | # Execute command 129 | echo 'Run c3 with: '$command 130 | $command 131 | 132 | # Check error code from command 133 | if [ $? -eq 0 ]; then 134 | echo "Operator created." 135 | # Add new files to git 136 | for git_file in $git_files 137 | do 138 | git add $git_file 139 | done 140 | 141 | # Get image name from yaml file 142 | while read line; 143 | do 144 | # strip line 145 | line=${line// /} 146 | # check of image substring and replace first : with = 147 | if [[ $line = image:* ]]; then declare "${line/:/=}"; fi 148 | done < ${git_files##* } 149 | # add image to image_list 150 | image_list+=' '$image 151 | 152 | else 153 | echo "Command failed with exit status $?" 154 | fi 155 | fi 156 | done 157 | 158 | # Push files to main if an operator was created 159 | git pull 160 | git commit -m "operators build [skip ci]" 161 | git push origin HEAD:main 162 | 163 | # Adding tags for each generated image 164 | for image in $image_list 165 | do 166 | echo "Add tag ${image/:/=}" 167 | git tag -f ${image/:/=} -m $image; 168 | done 169 | git push --tags 170 | -------------------------------------------------------------------------------- /.github/build_operators_commits.txt: -------------------------------------------------------------------------------- 1 | 850f7c49 2 | fefa826 3 | d66bcef 4 | 0a965ba 5 | ba6bebd 6 | 378ad3b 7 | 926ba78 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### What changes were proposed in this pull request? 2 | 8 | 9 | ### How was this pull request tested? 10 | 19 | 20 | 21 | Developer's Certificate of Origin 1.1 22 | 23 | By making a contribution to this project, I certify that: 24 | 25 | (a) The contribution was created in whole or in part by me and I 26 | have the right to submit it under the Apache License 2.0; or 27 | 28 | (b) The contribution is based upon previous work that, to the best 29 | of my knowledge, is covered under an appropriate open source 30 | license and I have the right under that license to submit that 31 | work with modifications, whether created in whole or in part 32 | by me, under the same open source license (unless I am 33 | permitted to submit under a different license), as indicated 34 | in the file; or 35 | 36 | (c) The contribution was provided directly to me by some other 37 | person who certified (a), (b) or (c) and I have not modified 38 | it. 39 | 40 | (d) I understand and agree that this project and the contribution 41 | are public and that a record of the contribution (including all 42 | personal information I submit with it, including my sign-off) is 43 | maintained indefinitely and may be redistributed consistent with 44 | this project or the open source license(s) involved. 45 | -------------------------------------------------------------------------------- /.github/workflows/build_operators.yaml: -------------------------------------------------------------------------------- 1 | name: Build and push CLAIMED operators 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | 7 | env: 8 | repository: docker.io/claimed 9 | log_level: INFO 10 | 11 | jobs: 12 | 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | with: 20 | fetch-depth: 0 21 | - name: Login to Docker Hub 22 | uses: docker/login-action@v1 23 | with: 24 | username: ${{ secrets.CR_USER }} 25 | password: ${{ secrets.CR_PASSWORD }} 26 | - name: Set up Python 3.10 27 | uses: actions/setup-python@v3 28 | with: 29 | python-version: "3.10" 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install claimed-c3 34 | - name: Build operators with C3 35 | run: | 36 | git config --global user.name 'claimed-framework' 37 | git config --global user.email 'claimed-framework@proton.me' 38 | bash .github/build_operators.sh 39 | -------------------------------------------------------------------------------- /.github/workflows/build_push_container_image.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Push Container Images 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | version: 0.8 11 | repository: claimed 12 | create_image: True 13 | 14 | jobs: 15 | 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Login to Docker Hub 23 | uses: docker/login-action@v1 24 | with: 25 | username: ${{ secrets.CR_USER }} 26 | password: ${{ secrets.CR_PASSWORD }} 27 | 28 | - name: Build the Docker image 29 | run: | 30 | cd ./component-library/transform/ 31 | ./spark-sql-interactive/build.sh 32 | cd .. 33 | cd .. 34 | cd ./component-library/anomaly/ 35 | ./anomaly-score-unsupervised/build.sh 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /.github/workflows/integration-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Integration Tests 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.10" 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -r test_requirements.txt 30 | - name: Full integration test 31 | run: | 32 | cd component-library 33 | python ./run_tests.py 34 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Static Code Analysis (Pylint) 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.8", "3.9", "3.10"] 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v3 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install pylint 25 | - name: Analysing the code with pylint 26 | run: | 27 | pylint $(git ls-files '*.py') 28 | pylint $(git ls-files '*.ipynb') 29 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Dynamic Code Analysis 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.10" 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install crosshair-tool 30 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 31 | - name: Test with pytest 32 | run: | 33 | crosshair watch ./component-library 34 | -------------------------------------------------------------------------------- /.github/workflows/python-package-conda.yml: -------------------------------------------------------------------------------- 1 | name: Python Package using Conda 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build-linux: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 5 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 3.10 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: '3.10' 17 | - name: Add conda to system path 18 | run: | 19 | # $CONDA is an environment variable pointing to the root of the miniconda directory 20 | echo $CONDA/bin >> $GITHUB_PATH 21 | - name: Install dependencies 22 | run: | 23 | conda env update --file environment.yml --name base 24 | - name: Lint with flake8 25 | run: | 26 | conda install flake8 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 30 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 31 | - name: Test with pytest 32 | run: | 33 | conda install pytest 34 | pytest 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.secrets 3 | .ipynb_checkpoints 4 | data/* 5 | *.log 6 | *.swp 7 | HMP_Dataset 8 | 9 | **/.ipynb_checkpoints/* 10 | **/.virtual_documents/* 11 | assets/.METADATA/job_run.* 12 | assets/job_run 13 | __pypackages__/ 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | *.so 18 | share/python-wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | *.manifest 23 | *.spec 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | htmlcov/ 27 | .tox/ 28 | .nox/ 29 | .coverage 30 | .coverage.* 31 | .cache 32 | nosetests.xml 33 | coverage.xml 34 | *.cover 35 | *.py,cover 36 | .hypothesis/ 37 | .pytest_cache/ 38 | cover/ 39 | *.mo 40 | *.pot 41 | local_settings.py 42 | db.sqlite3 43 | db.sqlite3-journal 44 | instance/ 45 | .webassets-cache 46 | .scrapy 47 | docs/_build/ 48 | .pybuilder/ 49 | profile_default/ 50 | ipython_config.py 51 | celerybeat-schedule 52 | celerybeat.pid 53 | .spyderproject 54 | .spyproject 55 | .ropeproject 56 | .mypy_cache/ 57 | .pyre/ 58 | .pytype/ 59 | cython_debug/ 60 | venv/ 61 | .venv/ 62 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | - See our changelog in the release section [here](https://github.com/claimed-framework/component-library/releases) 4 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | @romeokienzler 2 | @fatkaratekid 3 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## CLAIMED Community Code of Conduct 1.0 2 | 3 | 4 | ### Our Pledge 5 | 6 | As contributors and maintainers of this project, and in the interest of fostering 7 | an open and welcoming community, we pledge to respect all people who contribute 8 | through reporting issues, posting feature requests, updating documentation, 9 | submitting pull requests or patches, and other activities. 10 | 11 | We are committed to making participation in this project a harassment-free experience for 12 | everyone, regardless of level of experience, gender, gender identity and expression, 13 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, 14 | religion, or nationality. 15 | 16 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, 17 | inclusive, and healthy community. 18 | 19 | ### Our Standards 20 | 21 | Examples of behavior that contributes to a positive environment for our community include: 22 | 23 | * Demonstrating empathy and kindness toward other people 24 | * Being respectful of differing opinions, viewpoints, and experiences 25 | * Giving and gracefully accepting constructive feedback 26 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 27 | * Focusing on what is best not just for us as individuals, but for the overall community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or 32 | advances of any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior 43 | and will take appropriate and fair corrective action in response to any behavior that they deem 44 | inappropriate, threatening, offensive, or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, 47 | code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, 48 | and will communicate reasons for moderation decisions when appropriate. 49 | 50 | ## Scope 51 | 52 | This Code of Conduct applies within all community spaces including Matrix, issue trackers, wikis, 53 | blogs, Twitter, and any other communication channels used by our community, and also applies when 54 | an individual is officially representing the community in public spaces. Examples of representing 55 | our community include using an official e-mail address, posting via an official social media account, 56 | or acting as an appointed representative at an online or offline event. 57 | 58 | ## Enforcement 59 | 60 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community 61 | leaders responsible for enforcement via Matrix channel to the CLAIMED Project Management Committee at 62 | #claimed-pmc:matrix.org. All complaints will be reviewed and investigated promptly and fairly. 63 | 64 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 65 | 66 | 67 | ## Attribution 68 | 69 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), 70 | version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 71 | 72 | 73 | 74 | In addition, all participants agree to abide by the Code of Conduct available at https://lfprojects.org/policies/code-of-conduct/. 75 | Please contact conduct@lfaidata.foundation to report any violations or concerns. 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Contributing 20 | 21 | Welcome to CLAIMED! If you are interested in contributing to the [CLAIMED code repo](README.md) 22 | then checkout the [Contribution Process](https://github.com/claimed-framework/community/blob/main/contribution_process.md) and 23 | the [Code of Conduct](https://github.com/claimed-framework/component-library/blob/master/CODE_OF_CONDUCT.md)). 24 | 25 | The [CLAIMED community repo]([https://github.com/elyra-ai/community](https://github.com/claimed-framework/community)) contains information on how the community 26 | is organized and other information that is pertinent to contributing. 27 | 28 | ### Getting Started 29 | 30 | It's encouraged that you look under the [Issues]([https://github.com/IBM/claimed/issues](https://github.com/claimed-framework/component-library/issues)) tab for contribution opportunites. 31 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | # What is the CLAIMED Library? 2 | 3 | CLAIMED is a library of re-usable coarse-grained data processing 4 | components to create Data & AI pipelines without programming skills 5 | 6 | # How does the CLAIMED Library help developers and data scientists? 7 | 8 | Lead Data Scientists and Domain Experts contribute to the library to 9 | create opinionated, tested and re-usable components which are consumed 10 | by citizen data scientists and developers which enables them to create 11 | state of the art Data & AI workflows 12 | 13 | # Why did IBM decide to contribute this open source project to LFAI? 14 | 15 | An open source project is only as strong as its community. IBM wants to 16 | grow the community around CLAIMED and Elyra since both projects are 17 | very strategic open source projects for IBM and RedHat 18 | 19 | # When did IBM open source it? 20 | 21 | The initial repository was created in 2015 and was originally used to 22 | support the online courses IBM provides on Coursera.org and EDX.org. 23 | From the very positive feedback IBM received from the learners, we 24 | decided to create a general purpose library for AI, Machine Learning, 25 | ETL and Data Science. 26 | 27 | # Is there any competing project at IBM, or outside of IBM? 28 | 29 | No. Open Source and open standards are the key principles of CLAIMED. 30 | Therefore CLAIMED can be used in various contexts and therefore doesn't 31 | compete but integrate. 32 | 33 | # What action do we want the open source community to take? 34 | 35 | We are actively looking for developers and data scientists to use the 36 | library for the daily work including production ready software. We also 37 | want them to report issues, fix issues via pull request, participate in 38 | our discussions and contribute new components to the library 39 | 40 | 41 | # How do we want you to use CLAIMED? 42 | 43 | CLAIMED can be consumed in many ways and we encourage and support all 44 | scenarios. As each CLAIMED component is backed by a jupyter notebook or 45 | (R|python|bash) script and defines a clear interface they can be 46 | invoked directly from source code or from a command line. This way a 47 | Data & AI pipeline can be build by arbitrary code or shell scripts. The 48 | next level is using docker. As each CLAIMED component is automatically 49 | compiled into a container image, a set of "docker run" commands will do 50 | the job. Finally, CLAIMED also creates Kubeflow Pipeline Component 51 | specifications automatically, therefore, CLAIMED can be used in any 52 | Kubeflow Pipeline setting, where the Gold standard is using ML Exchange 53 | as component repository and Elyra as graphical pipeline editor. 54 | 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/6718/badge)](https://bestpractices.coreinfrastructure.org/projects/6718) 2 | [![GitHub](https://img.shields.io/badge/issue_tracking-github-blue.svg)](https://github.com/claimed-framework/component-library/issues) 3 | 4 | 5 | 6 | # CLAIMED - It's time to concentrate on your code only 7 | 8 | For more information, please visit the project's [website](https://claimed-framework.github.io/) 9 | 10 | ## Credits 11 | 12 | CLAIMED is supported by the EU’s Horizon Europe program under Grant Agreement number 101131841 and also received funding from the Swiss State Secretariat for Education, Research and Innovation (SERI) and the UK Research and Innovation (UKRI). 13 | 14 | 15 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | Please see the following [file](https://github.com/claimed-framework/component-library/blob/master/VULNERABILITIES.md) for SECURITY related content 2 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Support 5 | 6 | For support, please contact us on the [CLAIMED workspace](https://matrix.to/#/!HxmUqvBKioTlNGyZGJ:matrix.org?via=matrix.org) on Matrix.org, please use the [Issue Tracker](https://github.com/claimed-framework/component-library/issues) 7 | if you feel that you've found a defect or want to propose a new feature. 8 | -------------------------------------------------------------------------------- /VULNERABILITIES.md: -------------------------------------------------------------------------------- 1 | # VULNERABILITIES reporting process 2 | 3 | Vulnerabilities can be reported on the GitHub Issue Tracker but reporting them on a secure and private channel is preferred. 4 | 5 | The current way of reporting private vulnerabilities is using https://encrypt.to/romeokienzler. 6 | 7 | We are working on a more generic channel and update this information accordingly 8 | -------------------------------------------------------------------------------- /component-library/analyze/spark-ts-trends.yaml: -------------------------------------------------------------------------------- 1 | name: spark-ts-trends 2 | description: Computes trends of time series by fitting a low order polinomial 3 | 4 | 5 | inputs: 6 | - {name: data_parquet, type: String, description: 'data_parquet path and parquet file name (default: data.parquet)'} 7 | - {name: master, type: String, description: '@param master url of master (default: local mode)'} 8 | - {name: data_dir, type: String, description: 'data_dir temporal data storage for local execution'} 9 | 10 | 11 | outputs: 12 | - {name: output_result_filename, type: String, description: 'output_result_filename parquet file name of result (default: trends.parquet)'} 13 | 14 | 15 | implementation: 16 | container: 17 | image: romeokienzler/claimed-spark-ts-trends:0.3 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | python ./spark-ts-trends.py output_result_filename="$0" data_parquet="$1" master="$2" data_dir="$3" 23 | - {outputPath: output_result_filename} 24 | - {inputValue: data_parquet} 25 | - {inputValue: master} 26 | - {inputValue: data_dir} 27 | -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.access.redhat.com/ubi8/python-39 2 | RUN pip install ipython nbformat tensorflow==2.9.1 numpy==1.23.2 scikit-learn==1.1.2 pandas==1.4.3 flask==2.2.2 matplotlib==3.5.3 3 | ADD anomaly-score-unsupervised.py . 4 | ENTRYPOINT ["python", "anomaly-score-unsupervised.py"] -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install jupyter nbconvert 3 | jupyter nbconvert --to script anomaly-score-unsupervised.ipynb 4 | docker build -t claimed-anomaly-score-unsupervised:`echo $version` -f anomaly-score-unsupervised/Dockerfile . 5 | docker tag claimed-anomaly-score-unsupervised:`echo $version` `echo $repository`/claimed-anomaly-score-unsupervised:`echo $version` 6 | docker push `echo $repository`/claimed-anomaly-score-unsupervised:`echo $version` 7 | -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/publish_event.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dapr publish --publish-app-id anomalydetection --pubsub pubsub --topic anomaly-data --data '{"orderId": "100"}' -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/pubsub.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dapr.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: pubsub 5 | spec: 6 | type: pubsub.redis 7 | version: v1 8 | metadata: 9 | - name: redisHost 10 | value: localhost:6379 11 | - name: redisPassword 12 | value: "" 13 | -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/start_sidecar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dapr run --app-id anomalydetection --dapr-http-port 3601 -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/start_subscriber.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dapr run --app-id anomalydetection --app-port 6002 --dapr-http-port 3602 --app-protocol grpc -- python3 subscriber.py -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/statestore.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dapr.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: statestore 5 | spec: 6 | type: state.redis 7 | version: v1 8 | metadata: 9 | - name: redisHost 10 | value: localhost:6379 11 | - name: redisPassword 12 | value: "" 13 | - name: actorStateStore 14 | value: "true" 15 | -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/subscriber.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | #get_ipython().system('pip install dapr==1.7.0 cloudevents==1.6.1 dapr-ext-grpc==1.7.0') 8 | 9 | 10 | # In[2]: 11 | 12 | 13 | #import sys 14 | #s#ys.path.append('/home/romeokienzler/venvs/claimed/lib/python3.10/site-packages/') 15 | #sys.path 16 | 17 | 18 | # In[ ]: 19 | 20 | 21 | #dependencies 22 | from cloudevents.sdk.event import v1 23 | from dapr.ext.grpc import App 24 | import logging 25 | import json 26 | 27 | #code 28 | app = App() 29 | logging.basicConfig(level = logging.INFO) 30 | #Subscribe to a topic 31 | @app.subscribe(pubsub_name='pubsub', topic='anomaly-data') 32 | def mytopic(event: v1.Event) -> None: 33 | data = json.loads(event.Data()) 34 | logging.info('Subscriber received: ' + str(data)) 35 | 36 | app.run(6002) 37 | 38 | 39 | # In[ ]: 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /component-library/anomaly/anomaly-score-unsupervised/dapr/subscription.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dapr.io/v1alpha1 2 | kind: Subscription 3 | metadata: 4 | name: anomaly-data-sub 5 | spec: 6 | topic: anomaly-data 7 | route: /send_data 8 | pubsubname: pubsub 9 | scopes: 10 | - anomalydetection -------------------------------------------------------------------------------- /component-library/checkpoint/pull_asset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7e06922b-a531-4ac7-b254-6ae825c8da67", 6 | "metadata": {}, 7 | "source": [ 8 | "# pull_asset" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "19ddc5ea-34b5-4b31-a254-2ba3c44dc62c", 14 | "metadata": {}, 15 | "source": [ 16 | "Pulls an asset (file) from S3 compliant COS (Cloud Object Store) to provide it to subsequent stages to avoid re-processing of data.\n", 17 | "\n", 18 | "WARNING: currently only minio supported" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "5651e212-f812-46ec-b674-77680756856d", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "!pip install wget==3.2" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "stainless-purpose", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import wget\n", 39 | "wget.download(\n", 40 | " 'https://raw.githubusercontent.com/IBM/claimed/master/component-library/claimed_utils.py'\n", 41 | ")\n", 42 | "from claimed_utils import parse_args_to_parameters\n", 43 | "from minio import Minio\n", 44 | "import os\n", 45 | "import os.path" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "tired-access", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# checkpoint_ip minio endpoint\n", 56 | "checkpoint_ip = os.environ.get('checkpoint_ip')\n", 57 | "\n", 58 | "# checkpoint_user minio user\n", 59 | "checkpoint_user = os.environ.get('checkpoint_user', 'minio')\n", 60 | "\n", 61 | "# checkpoint_pass minio pw\n", 62 | "checkpoint_pass = os.environ.get('checkpoint_pass', 'minio123')\n", 63 | "\n", 64 | "# checkpoint_bucket minio bucket\n", 65 | "checkpoint_bucket = os.environ.get('checkpoint_bucket', 'checkpoint')\n", 66 | "\n", 67 | "# name of asset to pull\n", 68 | "asset_name = os.environ.get('asset_name')\n", 69 | "\n", 70 | "parse_args_to_parameters()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "bound-tuning", 77 | "metadata": { 78 | "tags": [] 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "client = Minio(checkpoint_ip, checkpoint_user, checkpoint_pass, secure=False)\n", 83 | "\n", 84 | "objects = client.list_objects(checkpoint_bucket)\n", 85 | "for obj in objects:\n", 86 | " if asset_name == obj.object_name:\n", 87 | " client.fget_object(checkpoint_bucket, asset_name, asset_name)\n", 88 | " break" 89 | ] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "Python 3 (ipykernel)", 95 | "language": "python", 96 | "name": "python3" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.8.10" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 5 113 | } 114 | -------------------------------------------------------------------------------- /component-library/checkpoint/pull_asset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/checkpoint/pull_asset.yaml -------------------------------------------------------------------------------- /component-library/checkpoint/store_asset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "91cb31a0-41b2-4760-9297-b619f8b47736", 6 | "metadata": { 7 | "jp-MarkdownHeadingCollapsed": true, 8 | "tags": [] 9 | }, 10 | "source": [ 11 | "# store_asset" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "id": "4d89ae40-6a75-4d6d-91f5-ced21c1f94e9", 17 | "metadata": {}, 18 | "source": [ 19 | "Stores an asset (file) to S3 compliant COS (Cloud Object Store) to provide it to subsequent stages to avoid re-processing of data.\n", 20 | "\n", 21 | "WARNING: currently only minio supported" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "6a28bc37-3944-4e5c-b8a0-e66be078ae4a", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "!pip install wget==3.2" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "stainless-purpose", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import wget\n", 42 | "wget.download(\n", 43 | " 'https://raw.githubusercontent.com/IBM/claimed/master/component-library/claimed_utils.py'\n", 44 | ")\n", 45 | "from claimed_utils import parse_args_to_parameters\n", 46 | "from io import BytesIO\n", 47 | "from minio import Minio\n", 48 | "import os\n", 49 | "import os.path" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "tired-access", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# checkpoint_ip minio endpoint\n", 60 | "checkpoint_ip = os.environ.get('checkpoint_ip')\n", 61 | "\n", 62 | "# checkpoint_user minio user\n", 63 | "checkpoint_user = os.environ.get('checkpoint_user', 'minio')\n", 64 | "\n", 65 | "# checkpoint_pass minio pw\n", 66 | "checkpoint_pass = os.environ.get('checkpoint_pass', 'minio123')\n", 67 | "\n", 68 | "# checkpoint_bucket minio bucket\n", 69 | "checkpoint_bucket = os.environ.get('checkpoint_bucket', 'checkpoint')\n", 70 | "\n", 71 | "# name of asset to pull\n", 72 | "asset_name = os.environ.get('asset_name')\n", 73 | "\n", 74 | "parse_args_to_parameters()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "bound-tuning", 81 | "metadata": { 82 | "tags": [] 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "client = Minio(checkpoint_ip, checkpoint_user, checkpoint_pass, secure=False)\n", 87 | "\n", 88 | "size = os.path.getsize(asset_name)\n", 89 | "with open(asset_name, 'rb') as fh:\n", 90 | " buf = BytesIO(fh.read())\n", 91 | " result = client.put_object(\n", 92 | " checkpoint_bucket, asset_name, buf, length=size\n", 93 | " )" 94 | ] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3 (ipykernel)", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.8.10" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 5 118 | } 119 | -------------------------------------------------------------------------------- /component-library/checkpoint/store_asset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/checkpoint/store_asset.yaml -------------------------------------------------------------------------------- /component-library/claimed_utils.py: -------------------------------------------------------------------------------- 1 | # Utilities used by CLAIMED 2 | 3 | import os 4 | import re 5 | import sys 6 | import zipfile 7 | 8 | # compresses 'path' into 'target' zipfile 9 | 10 | 11 | def zipdir(target, path): 12 | with zipfile.ZipFile(target, 'w', zipfile.ZIP_DEFLATED) as zipf: 13 | for root, dirs, files in os.walk(path): 14 | for file in files: 15 | zipf.write(os.path.join(root, file)) 16 | 17 | # uncompresses 'zipfile_name' to 'target' directory 18 | 19 | 20 | def unzip(target, zipfile_name): 21 | with zipfile.ZipFile(zipfile_name, 'r') as zip_ref: 22 | zip_ref.extractall(target) 23 | 24 | def parse_args_to_parameters(): 25 | # override parameters received from a potential call using %run magic 26 | parameters = list( 27 | map( 28 | lambda s: re.sub('$', '"', s), 29 | map( 30 | lambda s: s.replace('=', '="'), 31 | filter( 32 | lambda s: s.find('=') > -1 and s.find('[A-Za-z0-9_]*=[.\/A-Za-z0-9]*') > -1, 33 | sys.argv 34 | ) 35 | ) 36 | ) 37 | ) -------------------------------------------------------------------------------- /component-library/deploy/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components related to deployment of machine and deep learning models. 19 | 20 | It currently supports deployment to IBM Watson Machine Learning. Support for KFServing is in progress. 21 | 22 | In addition, helper components for deployment also reside in this folder. For example, a component responsible for model blessing to decide if a model can go into production based on metrics upstream components computed. 23 | -------------------------------------------------------------------------------- /component-library/deploy/condition-blessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "73c0482f-cf5c-4cb7-ab1e-4e6416a7b57a", 6 | "metadata": {}, 7 | "source": [ 8 | "# model-blessing" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "brutal-catch", 14 | "metadata": {}, 15 | "source": [ 16 | "Assesses metrics of an asset (ML model, dashboard, report, ...) and decides on readyness for production deployment. \n", 17 | "\n", 18 | "Currently, this is only a placeholder notebook which need to be extended. In the near future we are planning to come up with a baseline implementation which works for the average cases\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "1d802d6e-20cb-4792-9b50-e355a27700f1", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "!pip3 install wget==3.2" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "2d7f1bc5-b134-4d1e-80d8-9bb3709c0a23", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import os\n", 39 | "import wget" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "9a7b0b8e-7236-4e47-8b3e-3d44bfdf17a7", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# expression in python to be evaluated\n", 50 | "expression = os.environ.get('expression')\n", 51 | "\n", 52 | "# output_result_filename (default: result.txt)\n", 53 | "output_result_filename = os.environ.get('output_result_filename', 'result.txt')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "gentle-memorabilia", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# @returns result file containing TRUE/FALSE in the 1st line\n", 64 | "# and additional information\n", 65 | "# on the decision made in plain text in the subsequent lines" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "undefined-leave", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# Your implmementation here\n", 76 | "print('not yet implemented')" 77 | ] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3 (ipykernel)", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.10.6" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 5 101 | } 102 | -------------------------------------------------------------------------------- /component-library/deploy/condition-blessing.yaml: -------------------------------------------------------------------------------- 1 | name: model-blessing 2 | description: Assesses metrics of an asset (ML model, dashboard, report, ...) and decides on readyness for production deployment. 3 | 4 | 5 | inputs: 6 | - {name: expression, type: String, description: 'expression in python to be evaluated'} 7 | 8 | 9 | outputs: 10 | - {name: output_result_filename, type: String, description: 'output_result_filename (default: result.txt)'} 11 | 12 | 13 | implementation: 14 | container: 15 | image: romeokienzler/claimed-model-blessing:0.1 16 | command: 17 | - sh 18 | - -ec 19 | - | 20 | python ./condition-blessing.py output_result_filename="$0" expression="$1" 21 | - {outputPath: output_result_filename} 22 | - {inputValue: expression} 23 | -------------------------------------------------------------------------------- /component-library/deploy/deploy-kfserving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deploys model to KFServing\n", 8 | "Note: As this is running inside a KF Pipeline you need to grant additional permissions:\n", 9 | "\n", 10 | "kubectl create clusterrolebinding pipeline-runner-extend --clusterrole cluster-admin --serviceaccount=kubeflow:pipeline-runner (Kudos to Tommy Li https://github.com/Tomcli)\n", 11 | "\n", 12 | "and also inject the storage initializer into the control plane namespace\n", 13 | "https://github.com/kubeflow/kfserving/issues/1389" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "!pip3 install kfp-tekton\n", 23 | "!pip3 install kfserving==0.4.1" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from kfserving import constants\n", 33 | "from kfserving import KFServingClient\n", 34 | "from kfserving import utils\n", 35 | "from kfserving import V1alpha2EndpointSpec\n", 36 | "from kfserving import V1alpha2InferenceService\n", 37 | "from kfserving import V1alpha2InferenceServiceSpec\n", 38 | "from kfserving import V1alpha2PredictorSpec\n", 39 | "from kfserving import V1alpha2TensorflowSpec\n", 40 | "from kubernetes import client\n", 41 | "from kubernetes.client import V1ResourceRequirements\n", 42 | "import os" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# @param model_name How the model should be named on KFServing\n", 52 | "# @param model_uri URI where the model can be downloaded from" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "model_name = os.environ.get('model_name')\n", 62 | "model_uri = os.environ.get('model_uri')" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "constants.KFSERVING_VERSION" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Define namespace where InferenceService needs to be deployed to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "namespace = utils.get_default_target_namespace()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Define InferenceService" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Firstly define default endpoint spec, and then define the inferenceservice basic on the endpoint spec." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION\n", 111 | "default_endpoint_spec = V1alpha2EndpointSpec(\n", 112 | " predictor=V1alpha2PredictorSpec(\n", 113 | " tensorflow=V1alpha2TensorflowSpec(\n", 114 | " storage_uri=model_uri,\n", 115 | " resources=V1ResourceRequirements(\n", 116 | " requests={'cpu': '100m', 'memory': '1Gi'},\n", 117 | " limits={'cpu': '100m', 'memory': '1Gi'}))))\n", 118 | "\n", 119 | "isvc = V1alpha2InferenceService(\n", 120 | " api_version=api_version,\n", 121 | " kind=constants.KFSERVING_KIND,\n", 122 | " metadata=client.V1ObjectMeta(\n", 123 | " name='flower-sample',\n", 124 | " namespace=namespace\n", 125 | " ),\n", 126 | " spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)\n", 127 | ")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "KFServing = KFServingClient()\n", 137 | "KFServing.delete(model_name, namespace=namespace)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Create InferenceService" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Call KFServingClient to create InferenceService." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "KFServing = KFServingClient()\n", 161 | "KFServing.create(isvc)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Check the InferenceService" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "KFServing.get(model_name, namespace=namespace, watch=True, timeout_seconds=120)" 178 | ] 179 | } 180 | ], 181 | "metadata": { 182 | "kernelspec": { 183 | "display_name": "Python 3", 184 | "language": "python", 185 | "name": "python3" 186 | }, 187 | "language_info": { 188 | "codemirror_mode": { 189 | "name": "ipython", 190 | "version": 3 191 | }, 192 | "file_extension": ".py", 193 | "mimetype": "text/x-python", 194 | "name": "python", 195 | "nbconvert_exporter": "python", 196 | "pygments_lexer": "ipython3", 197 | "version": "3.8.6" 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 4 202 | } 203 | -------------------------------------------------------------------------------- /component-library/deploy/deploy_watson_machine_learning.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/deploy/deploy_watson_machine_learning.yaml -------------------------------------------------------------------------------- /component-library/deploy/deploy_wml_pmml.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deploy Watson ML (PMML)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Deploys a PMML model to IBM Watson Machine Learning (WML)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "!pip install ibm-watson-machine-learning==1.0.45\n", 24 | "# PLEASE RESTART YOUR KERNAL AFTER THIS LINE HAS BEEN EXECUTED" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "from ibm_watson_machine_learning import APIClient\n", 34 | "import logging\n", 35 | "import os\n", 36 | "import re\n", 37 | "import sys" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# IBM Cloud API Key https://cloud.ibm.com/iam/apikeys\n", 47 | "api_key = os.environ.get('api_key', '')\n", 48 | "\n", 49 | "# Machine Learning Model Deployment Space https://dataplatform.cloud.ibm.com/ml-runtime/spaces\n", 50 | "space = os.environ.get('space', '')\n", 51 | "\n", 52 | "# IBM Cloud Region (e.g. us-south)\n", 53 | "location = os.environ.get('location', '')\n", 54 | "\n", 55 | "# temporary directory for data\n", 56 | "data_dir = os.environ.get('data_dir',\n", 57 | " '../../data/')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "parameters = list(\n", 67 | " map(lambda s: re.sub('$', '\"', s),\n", 68 | " map(\n", 69 | " lambda s: s.replace('=', '=\"'),\n", 70 | " filter(\n", 71 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 72 | " sys.argv\n", 73 | " )\n", 74 | " )))\n", 75 | "\n", 76 | "for parameter in parameters:\n", 77 | " logging.warning('Parameter: ' + parameter)\n", 78 | " exec(parameter)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "wml_credentials = {\n", 88 | " \"apikey\": api_key,\n", 89 | " \"url\": 'https://' + location + '.ml.cloud.ibm.com'\n", 90 | "}" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "client = APIClient(wml_credentials)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "o = client.software_specifications.get_uid_by_name('pmml-3.0_4.3')\n", 109 | "software_spec_uid = o\n", 110 | "client.set.default_space(space)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "model_meta_props = {\n", 120 | " client.repository.ModelMetaNames.NAME: 'test_pmml2',\n", 121 | " client.repository.ModelMetaNames.TYPE: \"pmml_4.2\",\n", 122 | " client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid\n", 123 | "}\n", 124 | "\n", 125 | "published_model = client.repository.store_model(\n", 126 | " model=data_dir + 'model.xml',\n", 127 | " meta_props=model_meta_props,\n", 128 | ")\n", 129 | "\n", 130 | "model_uid = client.repository.get_model_uid(published_model)" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.8" 151 | }, 152 | "papermill": { 153 | "default_parameters": {}, 154 | "duration": 55.042719, 155 | "end_time": "2021-01-28T16:00:26.871724", 156 | "environment_variables": {}, 157 | "exception": null, 158 | "input_path": "/home/jovyan/work/elyra-classification/train-trusted-ai.ipynb", 159 | "output_path": "/home/jovyan/work/elyra-classification/train-trusted-ai.ipynb", 160 | "parameters": {}, 161 | "start_time": "2021-01-28T15:59:31.829005", 162 | "version": "2.2.2" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 4 167 | } 168 | -------------------------------------------------------------------------------- /component-library/deploy/deploy_wml_pmml.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/deploy/deploy_wml_pmml.yaml -------------------------------------------------------------------------------- /component-library/examples/alert_for_content_in_url.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: romeokienzler/claimed-alert-for-content-in-url:0.6 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | url_to_notify: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --url_to_notify 25 | url_to_query: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --url_to_query 31 | filter_content: 32 | type: string 33 | default: None 34 | inputBinding: 35 | position: 5 36 | prefix: --filter_content 37 | sleep: 38 | type: int 39 | default: 30 40 | inputBinding: 41 | position: 6 42 | prefix: --sleep 43 | 44 | 45 | outputs: [] 46 | -------------------------------------------------------------------------------- /component-library/examples/alert_for_content_in_url.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# alert_for_content_in_url" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Reqularly check if a URL has a specific content and if true call another url" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "!pip install requests" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import os\n", 33 | "import requests\n", 34 | "import time" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "url_to_notify = os.getenv('url_to_notify')\n", 44 | "url_to_query = os.getenv('url_to_query')\n", 45 | "filter_content = os.getenv('filter_content')\n", 46 | "sleep = int(os.getenv('sleep','30'))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "while True:\n", 56 | " r = requests.get(url_to_query)\n", 57 | " lines = r.text.split('\\n')\n", 58 | "\n", 59 | " for line in lines:\n", 60 | " if filter_content in line:\n", 61 | " print(requests.get(url_to_notify).text)\n", 62 | " break\n", 63 | " time.sleep(sleep)" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": ".venv", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.11.7" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /component-library/examples/alert_for_content_in_url.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: alert-for-content-in-url 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: alert-for-content-in-url 10 | image: romeokienzler/claimed-alert-for-content-in-url:0.6 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_alert_for_content_in_url.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: url_to_notify 17 | value: value_of_url_to_notify 18 | - name: url_to_query 19 | value: value_of_url_to_query 20 | - name: filter_content 21 | value: value_of_filter_content 22 | - name: sleep 23 | value: value_of_sleep 24 | restartPolicy: OnFailure 25 | imagePullSecrets: 26 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/examples/alert_for_content_in_url.yaml: -------------------------------------------------------------------------------- 1 | name: sleep 2 | description: "# alert_for_content_in_url Reqularly check if a URL has a specific content and if true call another url – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: url_to_notify, type: String, description: ""} 7 | - {name: url_to_query, type: String, description: ""} 8 | - {name: filter_content, type: String, description: ""} 9 | - {name: sleep, type: Integer, description: "", default: "30'"} 10 | 11 | 12 | outputs: 13 | 14 | 15 | implementation: 16 | container: 17 | image: romeokienzler/claimed-sleep:0.6 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | ipython ./claimed_alert_for_content_in_url.ipynb log_level="${0}" url_to_notify="${1}" url_to_query="${2}" filter_content="${3}" sleep="${4}" 23 | - {inputValue: log_level} 24 | - {inputValue: url_to_notify} 25 | - {inputValue: url_to_query} 26 | - {inputValue: filter_content} 27 | - {inputValue: sleep} 28 | -------------------------------------------------------------------------------- /component-library/examples/fibonacci.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: docker.io/mdorzweiler/claimed-fibonacci:0.1 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | b: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --b 25 | 26 | 27 | outputs: [] 28 | -------------------------------------------------------------------------------- /component-library/examples/fibonacci.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: fibonacci 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: fibonacci 10 | image: docker.io/mdorzweiler/claimed-fibonacci:0.1 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/python","claimed_fibonacci.py"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: b 17 | value: value_of_b 18 | restartPolicy: OnFailure 19 | imagePullSecrets: 20 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/examples/fibonacci.py: -------------------------------------------------------------------------------- 1 | def fib(b): 2 | n = int(b) 3 | if n == 0: 4 | return 0 5 | if n == 1: 6 | return 1 7 | return fib(n-2) + fib(n-1) 8 | 9 | b = os.getenv('b') 10 | print(fib(b)) 11 | -------------------------------------------------------------------------------- /component-library/examples/fibonacci.yaml: -------------------------------------------------------------------------------- 1 | name: fibonacci 2 | description: "claimed-fibonacci – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: b, type: String, description: ""} 7 | 8 | 9 | outputs: 10 | 11 | 12 | implementation: 13 | container: 14 | image: docker.io/mdorzweiler/claimed-fibonacci:0.1 15 | command: 16 | - sh 17 | - -ec 18 | - | 19 | python ./claimed_fibonacci.py log_level="${0}" b="${1}" 20 | - {inputValue: log_level} 21 | - {inputValue: b} 22 | -------------------------------------------------------------------------------- /component-library/examples/hello_world.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: hello-world 5 | spec: 6 | parallelism: 100 7 | template: 8 | spec: 9 | containers: 10 | - name: hello-world 11 | image: docker.io/romeokienzler/claimed-hello-world:0.1 12 | command: ["/opt/app-root/bin/python","/opt/app-root/src/hello_world.py"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: name 17 | value: value_of_name 18 | - name: place 19 | value: value_of_place 20 | - name: count 21 | value: value_of_count 22 | restartPolicy: OnFailure 23 | imagePullSecrets: 24 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/examples/hello_world.yaml: -------------------------------------------------------------------------------- 1 | name: hello-world 2 | description: "# hello-world This is the description of hello world – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: name, type: String, description: "the name to greet (mandatory)"} 7 | - {name: place, type: String, description: "the place to greet (optional)", default: "World"} 8 | - {name: count, type: Integer, description: "the number of times to repeat the greeting (optional)", default: "'1'"} 9 | 10 | 11 | outputs: 12 | 13 | 14 | implementation: 15 | container: 16 | image: docker.io/romeokienzler/claimed-hello-world:0.1 17 | command: 18 | - sh 19 | - -ec 20 | - | 21 | python ./hello_world.py log_level="${0}" name="${1}" place="${2}" count="${3}" 22 | - {inputValue: log_level} 23 | - {inputValue: name} 24 | - {inputValue: place} 25 | - {inputValue: count} 26 | -------------------------------------------------------------------------------- /component-library/filter/README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | This folder contains components related to filtering of structured data. In terms of Relational Algebra it's a [selection](https://en.wikipedia.org/wiki/Selection_(relational_algebra)). 20 | -------------------------------------------------------------------------------- /component-library/filter/filter.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: romeokienzler/claimed-filter:0.6 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | predicate: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --predicate 25 | file_name: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --file_name 31 | output_file_name: 32 | type: string 33 | default: None 34 | inputBinding: 35 | position: 5 36 | prefix: --output_file_name 37 | 38 | 39 | outputs: [] 40 | -------------------------------------------------------------------------------- /component-library/filter/filter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "arabic-honey", 6 | "metadata": {}, 7 | "source": [ 8 | "# filter" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "ebe4a7f0", 14 | "metadata": {}, 15 | "source": [ 16 | "Filters rows based on predicate on pandas data frame\n", 17 | "Example \"predicate=~metadata.filename.str.contains('.gz') \" => filters all rows where column \"filename\" contains '.gz\"" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "id": "fabulous-israeli", 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "!pip install pandas" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "d92abd54", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import os\n", 39 | "import logging\n", 40 | "logging.basicConfig(level=logging.DEBUG)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "upset-affair", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# predicate (as described in documentation of the component)\n", 51 | "predicate = os.environ.get('predicate')\n", 52 | "\n", 53 | "# file name / path of the CSV file to read\n", 54 | "file_name = os.environ.get('file_name')\n", 55 | "\n", 56 | "# output file name / path of the CSV file to write\n", 57 | "output_file_name = os.environ.get('output_file_name')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "atmospheric-mauritius", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "logging.debug('Opening file...')\n", 68 | "df = pd.read_csv(file_name)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "excellent-conference", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "logging.debug('Filtering...')\n", 79 | "exec('df = df[' + predicate + ']')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "id": "parental-dialogue", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "logging.debug('Writing file...')\n", 90 | "df.to_csv(output_file_name, index=False)\n", 91 | "logging.debug('Done')" 92 | ] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "Python 3", 98 | "language": "python", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.10.12" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 5 116 | } 117 | -------------------------------------------------------------------------------- /component-library/filter/filter.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: filter 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: filter 10 | image: romeokienzler/claimed-filter:0.6 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_filter.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: predicate 17 | value: value_of_predicate 18 | - name: file_name 19 | value: value_of_file_name 20 | - name: output_file_name 21 | value: value_of_output_file_name 22 | restartPolicy: OnFailure 23 | imagePullSecrets: 24 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/filter/filter.yaml: -------------------------------------------------------------------------------- 1 | name: output_file_name 2 | description: "# filter Filters rows based on predicate on pandas data frame – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: predicate, type: String, description: "predicate (as described in documentation of the component)"} 7 | - {name: file_name, type: String, description: "file name / path of the CSV file to read"} 8 | - {name: output_file_name, type: String, description: "output file name / path of the CSV file to write"} 9 | 10 | 11 | outputs: 12 | 13 | 14 | implementation: 15 | container: 16 | image: romeokienzler/claimed-output_file_name:0.6 17 | command: 18 | - sh 19 | - -ec 20 | - | 21 | ipython ./claimed_filter.ipynb log_level="${0}" predicate="${1}" file_name="${2}" output_file_name="${3}" 22 | - {inputValue: log_level} 23 | - {inputValue: predicate} 24 | - {inputValue: file_name} 25 | - {inputValue: output_file_name} 26 | -------------------------------------------------------------------------------- /component-library/filter/filter_docker.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.2 3 | class: CommandLineTool 4 | baseCommand: ["/opt/app-root/bin/ipython","/opt/app-root/src/filter.py"] 5 | hints: 6 | DockerRequirement: 7 | dockerPull: romeokienzler/claimed-filter:0.5 8 | inputs: 9 | log_level: 10 | type: string 11 | default: "INFO" 12 | inputBinding: 13 | position: 1 14 | prefix: --log_level 15 | predicate: 16 | type: string 17 | default: None 18 | inputBinding: 19 | position: 2 20 | prefix: --predicate 21 | file_name: 22 | type: string 23 | default: None 24 | inputBinding: 25 | position: 3 26 | prefix: --file_name 27 | output_file_name: 28 | type: string 29 | default: None 30 | inputBinding: 31 | position: 4 32 | prefix: --output_file_name 33 | 34 | outputs: 35 | dummy_out: 36 | type: File 37 | outputBinding: 38 | glob: query_result.csv -------------------------------------------------------------------------------- /component-library/generic-notebook-runner.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "53af08cf-0149-4369-93d6-fba2203ec6cc", 6 | "metadata": {}, 7 | "source": [ 8 | "# Run any notebook\n", 9 | "\n", 10 | "Pulls a notebook from an URL provided and runs it via ipython" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "9ee25986-e86d-4995-9a1d-0e2ea95e35ab", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "#os.environ['create_image']='True'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "38ee13e4-dd59-4b52-982a-ea5ba3f6594e", 28 | "metadata": { 29 | "scrolled": true, 30 | "tags": [] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "if bool(os.environ.get('create_image',False)):\n", 35 | " docker_file=\"\"\"\n", 36 | " FROM registry.access.redhat.com/ubi8/python-39\n", 37 | " RUN pip install ipython nbformat\n", 38 | " ADD generic-notebook-runner.ipynb /\n", 39 | " ENTRYPOINT [\"ipython\",\"/generic-notebook-runner.ipynb\"]\n", 40 | " \"\"\"\n", 41 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 42 | " text_file.write(docker_file)\n", 43 | "\n", 44 | " !docker build -t generic-notebook-runner .\n", 45 | " !docker tag generic-notebook-runner romeokienzler/generic-notebook-runner\n", 46 | " !docker push romeokienzler/generic-notebook-runner\n", 47 | " exit(0)\n", 48 | "else:\n", 49 | " None" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "60436d8a-f461-4723-abeb-cc22b555c782", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "%%bash\n", 60 | "curl -o /tmp/notebook.ipynb $NOTEBOOK_URL\n", 61 | "ipython /tmp/notebook.ipynb" 62 | ] 63 | } 64 | ], 65 | "metadata": { 66 | "kernelspec": { 67 | "display_name": "Python 3 (ipykernel)", 68 | "language": "python", 69 | "name": "python3" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 3 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython3", 81 | "version": "3.9.6" 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 5 86 | } 87 | -------------------------------------------------------------------------------- /component-library/geo/gdal.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "binding-delta", 7 | "metadata": { 8 | "papermill": { 9 | "duration": 0.016304, 10 | "end_time": "2021-03-22T20:29:23.476444", 11 | "exception": false, 12 | "start_time": "2021-03-22T20:29:23.460140", 13 | "status": "completed" 14 | }, 15 | "tags": [] 16 | }, 17 | "source": [ 18 | "# gdal" 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "id": "bb97e294-9399-4d96-a95c-8ad7e29a2872", 25 | "metadata": {}, 26 | "source": [ 27 | "Use Geospatial Data Abstraction Library (GDAL) for working with raster and vector data" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "884bff3a-dc51-4c8b-a98f-1d0a8ac1de94", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "\n", 39 | "os.environ['create_image']='True'\n", 40 | "os.environ['repository']='docker.io/romeokienzler'\n", 41 | "os.environ['version']='0.2'" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "7f19a1a1-9cf2-4cb6-a0d0-859c9de3a525", 48 | "metadata": { 49 | "scrolled": true, 50 | "tags": [] 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "if bool(os.environ.get('create_image',False)):\n", 55 | " docker_file=\"\"\"\n", 56 | " FROM ubuntu\n", 57 | " RUN apt update && apt install -y python3-gdal python3-pip\n", 58 | " RUN apt update && apt install -y software-properties-common\n", 59 | " RUN add-apt-repository ppa:ubuntugis/ppa\n", 60 | " RUN apt update && apt install -y gdal-bin\n", 61 | " RUN pip install ipython nbformat\n", 62 | " \"\"\"\n", 63 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 64 | " text_file.write(docker_file)\n", 65 | "\n", 66 | " !docker build -t claimed_gdal:`echo $version` .\n", 67 | " !docker tag claimed_gdal:`echo $version` `echo $repository`/claimed_gdal:`echo $version`\n", 68 | " !docker push `echo $repository`/claimed_gdal:`echo $version`\n", 69 | " !rm Dockerfile" 70 | ] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 3", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.11.1 (main, Dec 7 2022, 00:00:00) [GCC 12.2.1 20221121 (Red Hat 12.2.1-4)]" 90 | }, 91 | "papermill": { 92 | "default_parameters": {}, 93 | "duration": 470.538548, 94 | "end_time": "2021-03-22T20:37:13.369954", 95 | "environment_variables": {}, 96 | "exception": null, 97 | "input_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb", 98 | "output_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb", 99 | "parameters": {}, 100 | "start_time": "2021-03-22T20:29:22.831406", 101 | "version": "2.3.3" 102 | }, 103 | "vscode": { 104 | "interpreter": { 105 | "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" 106 | } 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 5 111 | } 112 | -------------------------------------------------------------------------------- /component-library/input/README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | This folder contains components connecting to a (remote) backend for data retrieval. An (incomplete) list of possible data source types is: 20 | 21 | - Relational Databases 22 | - NoSQL Databases 23 | - S3/Cloud Object Store 24 | - HTTP(s) endpoints 25 | - FTP servers 26 | - git repositories 27 | - ... 28 | -------------------------------------------------------------------------------- /component-library/input/input-Xview-download.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: docker.io/mdorzweiler/claimed-input-xview-download:0.1 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | username: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --username 25 | password: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --password 31 | move_to_dir: 32 | type: string 33 | default: None 34 | inputBinding: 35 | position: 5 36 | prefix: --move_to_dir 37 | chromedriver_path: 38 | type: string 39 | default: None 40 | inputBinding: 41 | position: 6 42 | prefix: --chromedriver_path 43 | max_download_time: 44 | type: string 45 | default: None 46 | inputBinding: 47 | position: 7 48 | prefix: --max_download_time 49 | label: 50 | type: string 51 | default: None 52 | inputBinding: 53 | position: 8 54 | prefix: --label 55 | 56 | 57 | outputs: [] 58 | -------------------------------------------------------------------------------- /component-library/input/input-Xview-download.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: input-xview-download 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: input-xview-download 10 | image: docker.io/mdorzweiler/claimed-input-xview-download:0.1 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_input-Xview-download.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: username 17 | value: value_of_username 18 | - name: password 19 | value: value_of_password 20 | - name: move_to_dir 21 | value: value_of_move_to_dir 22 | - name: chromedriver_path 23 | value: value_of_chromedriver_path 24 | - name: max_download_time 25 | value: value_of_max_download_time 26 | - name: label 27 | value: value_of_label 28 | restartPolicy: OnFailure 29 | imagePullSecrets: 30 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/input/input-Xview-download.yaml: -------------------------------------------------------------------------------- 1 | name: input-xview-download 2 | description: "## Xview Dataset Download – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: username, type: String, description: "username for the Xview webpage to authorize login"} 7 | - {name: password, type: String, description: "password for the Xview webpage to authorize login"} 8 | - {name: move_to_dir, type: String, description: "move_to_dir the directory where the dataset should be saved"} 9 | - {name: chromedriver_path, type: String, description: "chromedriver_path the directory where the local copy of chromedriver is saved"} 10 | - {name: max_download_time, type: String, description: "max_download_time before timeout, must be ajusted acording to the file size and internet speed"} 11 | - {name: label, type: String, description: "standing for TI=Traning Images, TL=Training Lables, VI=Validation Images"} 12 | 13 | 14 | outputs: 15 | 16 | 17 | implementation: 18 | container: 19 | image: docker.io/mdorzweiler/claimed-input-xview-download:0.1 20 | command: 21 | - sh 22 | - -ec 23 | - | 24 | ipython ./claimed_input-Xview-download.ipynb log_level="${0}" username="${1}" password="${2}" move_to_dir="${3}" chromedriver_path="${4}" max_download_time="${5}" label="${6}" 25 | - {inputValue: log_level} 26 | - {inputValue: username} 27 | - {inputValue: password} 28 | - {inputValue: move_to_dir} 29 | - {inputValue: chromedriver_path} 30 | - {inputValue: max_download_time} 31 | - {inputValue: label} 32 | -------------------------------------------------------------------------------- /component-library/input/input-climate-copernicus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/input/input-climate-copernicus.yaml -------------------------------------------------------------------------------- /component-library/input/input-codenet-LangClass.yaml: -------------------------------------------------------------------------------- 1 | name: Pulls Codenet classification data from the ml-exchange.org 2 | description: Pulls Codenet classification data.zip from the ml-exchange.org in a form ready for text classification in the folowing format: 3 | 4 | 5 | inputs: 6 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 7 | 8 | 9 | outputs: 10 | - {name: output_filename, type: String, description: 'file name for training data zip'} 11 | 12 | 13 | implementation: 14 | container: 15 | image: romeokienzler/claimed-Pulls Codenet classification data from the ml-exchange.org:0.1 16 | command: 17 | - sh 18 | - -ec 19 | - | 20 | python ./input-codenet-LangClass.py output_filename="$0" data_dir="$1" 21 | - {outputPath: output_filename} 22 | - {inputValue: data_dir} 23 | -------------------------------------------------------------------------------- /component-library/input/input-covid-chestxray.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/input/input-covid-chestxray.yaml -------------------------------------------------------------------------------- /component-library/input/input-from-mongodb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Input From MongoDB\n", 9 | "This notebook pulls data set from [MongoDB Atlas -- The Cloud-Native Database](https://www.mongodb.com/cloud/atlas/lp/try4?utm_source=google&utm_campaign=search_gs_pl_evergreen_atlas_core-high-int_prosp-brand_gic-null_emea-ie_ps-all_desktop_eng_lead&utm_term=mongodb%20atlas&utm_medium=cpc_paid_search&utm_ad=e&utm_ad_campaign_id=19630910055&adgroup=145923638859&cq_cmp=19630910055&gad=1&gclid=Cj0KCQjwsIejBhDOARIsANYqkD2ewYCphoJvBv_op03rwncSNye4Mq_RihH9EaZKsnFK4BV-roOCOrYaAuSREALw_wcB) database\n", 10 | "- first connect with mongo with url\n", 11 | "- then test the connection\n", 12 | "- save retrieved data into csv file" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!pip install pymongo, dnspython" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from pymongo import MongoClient\n", 31 | "import csv\n", 32 | "import os\n", 33 | "import logging\n", 34 | "import sys\n", 35 | "import re" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# database username\n", 45 | "database_username = os.environ.get('database_username')\n", 46 | "\n", 47 | "# database password\n", 48 | "password = os.environ.get('password')\n", 49 | "\n", 50 | "# cluster url\n", 51 | "cluster_url = os.environ.get('cluster_url')\n", 52 | "\n", 53 | "# database name\n", 54 | "database = os.environ.get('database')\n", 55 | "\n", 56 | "# database collection\n", 57 | "collection = os.environ.get('collection')\n", 58 | "\n", 59 | "# database document name\n", 60 | "document = os.environ.get('document')\n", 61 | "\n", 62 | "# path and file name for output\n", 63 | "output_data_csv = os.environ.get('output_data_csv')\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "parameters = list(\n", 73 | " map(lambda s: re.sub('$', '\"', s),\n", 74 | " map(\n", 75 | " lambda s: s.replace('=', '=\"'),\n", 76 | " filter(\n", 77 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 78 | " sys.argv\n", 79 | " )\n", 80 | " )))\n", 81 | "\n", 82 | "for parameter in parameters:\n", 83 | " logging.warning('Parameter: ' + parameter)\n", 84 | " exec(parameter)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# MongoDB connection settings, Connection URL. Where your mongodb server is running.\n", 94 | "url = f\"mongodb+srv://{database_username}:{password}@{cluster_url}/test?retryWrites=true&w=majority\"\n", 95 | "\n", 96 | "# Connect to MongoDB\n", 97 | "client = MongoClient(url)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# MongoDB connection testing, Access specific database\n", 107 | "db = client[database]\n", 108 | "\n", 109 | "# Access specific collection\n", 110 | "collection = db[collection]\n", 111 | "\n", 112 | "# Retrieve a document from the collection\n", 113 | "document = collection.find_one(document)\n", 114 | "\n", 115 | "# Print/Show the document\n", 116 | "print(document)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Save the document, Open the file in write mode\n", 126 | "with open(output_data_csv, 'w', newline='') as file:\n", 127 | " # Create a CSV writer object\n", 128 | " writer = csv.writer(file)\n", 129 | "\n", 130 | " # Write the document data to the CSV file\n", 131 | " writer.writerow(document.keys()) # Write the header row\n", 132 | " writer.writerow(document.values()) # Write the data row\n", 133 | "\n", 134 | "# Close the file\n", 135 | "file.close()\n", 136 | "\n", 137 | "print(\"Output saved successfully to CSV!\")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "#finally close the connection\n", 147 | "client.close()\n", 148 | "print(\"Output saved successfully to CSV!\")" 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "base", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.10.9" 169 | }, 170 | "orig_nbformat": 4 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 2 174 | } 175 | -------------------------------------------------------------------------------- /component-library/input/input-hmp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/input/input-hmp.yaml -------------------------------------------------------------------------------- /component-library/input/input-pardata.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Input pardata (formerly pydax)\n", 8 | "Pulls data from patdata repository (former IBM DAX) \n", 9 | "Currently hardcoded to jfk data set" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "!pip install pardata==0.3.0" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import os\n", 28 | "import pardata\n", 29 | "import re\n", 30 | "import sys" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# path and file name for output (default: data.csv)\n", 40 | "data_csv = os.environ.get('data_csv', 'data.csv')\n", 41 | "\n", 42 | "# temporal data storage for local execution\n", 43 | "data_dir = os.environ.get('data_dir', '../../data/')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# override parameters received from a potential call using %run magic\n", 53 | "parameters = list(\n", 54 | " map(\n", 55 | " lambda s: re.sub('$', '\"', s),\n", 56 | " map(\n", 57 | " lambda s: s.replace('=', '=\"'),\n", 58 | " filter(\n", 59 | " lambda s: s.find('=') > -1,\n", 60 | " sys.argv\n", 61 | " )\n", 62 | " )\n", 63 | " )\n", 64 | ")\n", 65 | "\n", 66 | "for parameter in parameters:\n", 67 | " exec(parameter)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "df = pardata.load_dataset('noaa_jfk')['jfk_weather_cleaned']" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "df.to_csv(data_dir + data_csv, index=False)" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 3", 92 | "language": "python", 93 | "name": "python3" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.8.6" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 4 110 | } 111 | -------------------------------------------------------------------------------- /component-library/input/input-pardata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/input/input-pardata.yaml -------------------------------------------------------------------------------- /component-library/input/input-postgresql.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Input Postgresql" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This component pulls data from a postgresql database as CSV on a given SQL statement. Parameters like\n", 15 | "host, database, user, password and sql need to be set. Please note that data is processed in-memory (pandas) and can't spill on disk (spark) yet. Therefore, the queried data must fit onto main memory (of the POD in case running within KubeFlow context." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "!pip install psycopg2-binary==2.9.1 pandas==1.3.1" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import os\n", 34 | "import pandas as pd\n", 35 | "import psycopg2\n", 36 | "import re\n", 37 | "import sys" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# path and file name for output\n", 47 | "output_data_csv = os.environ.get('output_data_csv', 'data.csv')\n", 48 | "\n", 49 | "# hostname of database server\n", 50 | "host = os.environ.get('host')\n", 51 | "\n", 52 | "# database name\n", 53 | "database = os.environ.get('database')\n", 54 | "\n", 55 | "# db user\n", 56 | "user = os.environ.get('user')\n", 57 | "\n", 58 | "# db password\n", 59 | "password = os.environ.get('password')\n", 60 | "\n", 61 | "# db port\n", 62 | "port = int(os.environ.get('port', 5432))\n", 63 | "\n", 64 | "# sql query statement to be executed\n", 65 | "sql = os.environ.get('sql')\n", 66 | "\n", 67 | "# temporal data storage for local execution\n", 68 | "data_dir = os.environ.get('data_dir', '../../data/')" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# override parameters received from a potential call using %run magic\n", 78 | "parameters = list(\n", 79 | " map(\n", 80 | " lambda s: re.sub('$', '\"', s),\n", 81 | " map(\n", 82 | " lambda s: s.replace('=', '=\"'),\n", 83 | " filter(\n", 84 | " lambda s: s.find('=') > -1,\n", 85 | " sys.argv\n", 86 | " )\n", 87 | " )\n", 88 | " )\n", 89 | ")\n", 90 | "\n", 91 | "for parameter in parameters:\n", 92 | " exec(parameter)\n", 93 | "\n", 94 | "# cast parameters to appropriate type\n", 95 | "port = int(port)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "print('Logging configuration parameters...')\n", 105 | "print(output_data_csv)\n", 106 | "print(host)\n", 107 | "print(database)\n", 108 | "print(user)\n", 109 | "print(password)\n", 110 | "print(port)\n", 111 | "print(sql)\n", 112 | "print(data_dir)\n", 113 | "print('...done')" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "conn = psycopg2.connect(\n", 123 | " host=host,\n", 124 | " database=database,\n", 125 | " user=user,\n", 126 | " password=password,\n", 127 | " port=port\n", 128 | ")\n", 129 | "print('Connection successfull')" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "d = pd.read_sql_query(sql, conn)\n", 139 | "print('Query successfull')" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "conn.close()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "d.to_csv(data_dir + output_data_csv, index=False)\n", 158 | "print('Data written successfully')" 159 | ] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3 (ipykernel)", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.9.6" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 4 183 | } 184 | -------------------------------------------------------------------------------- /component-library/input/input-postgresql.yaml: -------------------------------------------------------------------------------- 1 | name: Input Postgresql 2 | description: This component pulls data from a postgresql database as CSV on a given SQL statement. Parameters like 3 | 4 | 5 | inputs: 6 | - {name: host, type: String, description: 'hostname of database server'} 7 | - {name: database, type: String, description: 'database name'} 8 | - {name: user, type: String, description: 'db user'} 9 | - {name: password, type: String, description: 'db password'} 10 | - {name: port, type: Integer, description: 'db port'} 11 | - {name: sql, type: String, description: 'sql query statement to be executed'} 12 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 13 | 14 | 15 | outputs: 16 | - {name: output_data_csv, type: String, description: 'path and file name for output'} 17 | 18 | 19 | implementation: 20 | container: 21 | image: romeokienzler/claimed-Input Postgresql:0.1 22 | command: 23 | - sh 24 | - -ec 25 | - | 26 | python ./input-postgresql.py output_data_csv="$0" host="$1" database="$2" user="$3" password="$4" port="$5" sql="$6" data_dir="$7" 27 | - {outputPath: output_data_csv} 28 | - {inputValue: host} 29 | - {inputValue: database} 30 | - {inputValue: user} 31 | - {inputValue: password} 32 | - {inputValue: port} 33 | - {inputValue: sql} 34 | - {inputValue: data_dir} 35 | -------------------------------------------------------------------------------- /component-library/input/input-rki-covid19-deaths.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "60b7f9ad-5260-46e7-915b-11c47814c6e2", 7 | "metadata": { 8 | "papermill": { 9 | "duration": 7.478533, 10 | "end_time": "2022-01-10T17:23:46.955316", 11 | "exception": false, 12 | "start_time": "2022-01-10T17:23:39.476783", 13 | "status": "completed" 14 | }, 15 | "tags": [] 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "!pip install pandas==1.3.5 openpyxl==3.0.9" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "c2498ac1-01dc-475a-b1d3-bad3743747db", 26 | "metadata": { 27 | "papermill": { 28 | "duration": 0.470443, 29 | "end_time": "2022-01-10T17:23:47.442372", 30 | "exception": false, 31 | "start_time": "2022-01-10T17:23:46.971929", 32 | "status": "completed" 33 | }, 34 | "tags": [] 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "665cb7d2-7a89-4c6c-9956-81b9c1260e14", 45 | "metadata": { 46 | "papermill": { 47 | "duration": 0.608799, 48 | "end_time": "2022-01-10T17:23:48.068080", 49 | "exception": false, 50 | "start_time": "2022-01-10T17:23:47.459281", 51 | "status": "completed" 52 | }, 53 | "tags": [] 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "storage_options = {'User-Agent': 'Mozilla/5.0'}\n", 58 | "df = pd.read_excel('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/COVID-19_Todesfaelle.xlsx?__blob=publicationFile', storage_options=storage_options, sheet_name=2)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "8e78ac6e-85df-4b25-8f9d-189bfa6c95cf", 65 | "metadata": { 66 | "papermill": { 67 | "duration": 0.030524, 68 | "end_time": "2022-01-10T17:23:48.113731", 69 | "exception": false, 70 | "start_time": "2022-01-10T17:23:48.083207", 71 | "status": "completed" 72 | }, 73 | "tags": [] 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "df.to_csv('../../data/data.csv', index=False)" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3 (ipykernel)", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.9.6" 98 | }, 99 | "papermill": { 100 | "default_parameters": {}, 101 | "duration": 10.086186, 102 | "end_time": "2022-01-10T17:23:48.436357", 103 | "environment_variables": {}, 104 | "exception": null, 105 | "input_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-rki-covid19-deaths.ipynb", 106 | "output_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-rki-covid19-deaths.ipynb", 107 | "parameters": {}, 108 | "start_time": "2022-01-10T17:23:38.350171", 109 | "version": "2.3.3" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 5 114 | } 115 | -------------------------------------------------------------------------------- /component-library/input/input-url.yaml: -------------------------------------------------------------------------------- 1 | name: input-url 2 | description: This component reads a file from a HTTP(s) source via wget CLAIMED v0.2n 3 | 4 | inputs: 5 | - {name: url, type: String, description: 'url of souce'} 6 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 7 | 8 | 9 | outputs: 10 | - {name: output_data, type: OutputPath, description: 'path and file name for output'} 11 | 12 | 13 | implementation: 14 | container: 15 | image: romeokienzler/claimed-input-url:0.2n 16 | command: 17 | - sh 18 | - -ec 19 | - | 20 | ipython ./input-url.ipynb output_data="$0" url="$1" data_dir="$2" 21 | - {outputPath: output_data} 22 | - {inputValue: url} 23 | - {inputValue: data_dir} 24 | -------------------------------------------------------------------------------- /component-library/input/input-webcam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d00fc083", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.016307, 9 | "end_time": "2022-01-10T17:05:19.160432", 10 | "exception": false, 11 | "start_time": "2022-01-10T17:05:19.144125", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "# Input Webcam" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "645dc2c4", 23 | "metadata": { 24 | "papermill": { 25 | "duration": 0.015642, 26 | "end_time": "2022-01-10T17:05:19.193671", 27 | "exception": false, 28 | "start_time": "2022-01-10T17:05:19.178029", 29 | "status": "completed" 30 | }, 31 | "tags": [] 32 | }, 33 | "source": [ 34 | "Creates video stream from webcam and forwards each frame as PNG one by one to http endpoint via POST" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "166a39dd-a553-4b2b-be0a-0e6e1fdab6fe", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import os\n", 45 | "\"\"\"\n", 46 | "os.environ['create_image']='True'\n", 47 | "os.environ['repository']='romeokienzler'\n", 48 | "os.environ['version']='0.14'\n", 49 | "\"\"\"\n", 50 | "os.environ['install_requirements']='True'" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "5ebf4468-2a07-4938-a090-704770bdb762", 57 | "metadata": { 58 | "scrolled": true, 59 | "tags": [] 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "if bool(os.environ.get('create_image',False)):\n", 64 | " docker_file=\"\"\"\n", 65 | " FROM registry.access.redhat.com/ubi8/python-39\n", 66 | " RUN pip install ipython nbformat opencv-python\n", 67 | " ADD image-endpoint.ipynb /\n", 68 | " ENTRYPOINT [\"ipython\",\"/image-endpoint.ipynb\",\"> /tmp/component.log\",\"2> /tmp/component.err\"]\n", 69 | " \"\"\"\n", 70 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 71 | " text_file.write(docker_file)\n", 72 | "\n", 73 | " !docker build -t claimed-predict-image-endpoint:`echo $version` .\n", 74 | " !docker tag claimed-predict-image-endpoint:`echo $version` `echo $repository`/claimed-predict-image-endpoint:`echo $version`\n", 75 | " !docker push `echo $repository`/claimed-predict-image-endpoint:`echo $version`\n", 76 | "elif bool(os.environ.get('install_requirements',False)):\n", 77 | " !pip install opencv-python" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "054a358d", 84 | "metadata": { 85 | "papermill": { 86 | "duration": 0.02608, 87 | "end_time": "2022-01-10T17:05:21.005692", 88 | "exception": false, 89 | "start_time": "2022-01-10T17:05:20.979612", 90 | "status": "completed" 91 | }, 92 | "tags": [] 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "import cv2" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "cca6e990-40d8-439d-9205-bcfaf2ce82b2", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "from __future__ import print_function\n", 107 | "import requests\n", 108 | "import json\n", 109 | "import cv2\n", 110 | "\n", 111 | "vid = cv2.VideoCapture(0)\n", 112 | "\n", 113 | "while(True):\n", 114 | " \n", 115 | " # Capture the video frame\n", 116 | " # by frame\n", 117 | " ret, frame = vid.read()\n", 118 | " if not ret:\n", 119 | " print('err')\n", 120 | " break\n", 121 | " \n", 122 | " _, img_encoded = cv2.imencode('.png', frame)\n", 123 | " \n", 124 | " content_type = 'multipart/form-data'\n", 125 | " headers = {'content-type': content_type}\n", 126 | " \n", 127 | " response = requests.post('http://127.0.0.1:8080/process-labels', data=img_encoded.tostring(), headers=headers)\n", 128 | "# decode response\n", 129 | " print(response.text)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "cd0b2746-85c6-4748-b290-c76f30adf248", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "vid.release()\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "id": "b595c0b6-4722-4dd9-8c26-8a5f8df24601", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "vid = cv2.VideoCapture(0)\n", 150 | " \n", 151 | "while(True):\n", 152 | " \n", 153 | " # Capture the video frame\n", 154 | " # by frame\n", 155 | " ret, frame = vid.read()\n", 156 | " \n", 157 | " # Display the resulting frame\n", 158 | " cv2.imshow('frame', frame)\n", 159 | " \n", 160 | " # the 'q' button is set as the\n", 161 | " # quitting button you may use any\n", 162 | " # desired button of your choice\n", 163 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 164 | " break\n", 165 | " \n", 166 | "# After the loop release the cap object\n", 167 | "vid.release()\n", 168 | "# Destroy all the windows\n", 169 | "cv2.destroyAllWindows()" 170 | ] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 3 (ipykernel)", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.9.13" 190 | }, 191 | "papermill": { 192 | "default_parameters": {}, 193 | "duration": 41.725932, 194 | "end_time": "2022-01-10T17:05:59.665500", 195 | "environment_variables": {}, 196 | "exception": null, 197 | "input_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-url.ipynb", 198 | "output_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-url.ipynb", 199 | "parameters": {}, 200 | "start_time": "2022-01-10T17:05:17.939568", 201 | "version": "2.3.3" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /component-library/input/ls-cos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "53af08cf-0149-4369-93d6-fba2203ec6cc", 6 | "metadata": {}, 7 | "source": [ 8 | "# List content of a bucket in Cloud Object Storage COS" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "7790dbc7-325c-49ff-a709-b4c3c8a659b7", 14 | "metadata": {}, 15 | "source": [ 16 | "List content of a bucket in Cloud Object Storage COS" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "05a6d935-38c5-49df-9df2-fbc8c31c4634", 23 | "metadata": { 24 | "scrolled": true, 25 | "tags": [] 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "!pip install aiobotocore botocore s3fs" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "id": "85859764-8fd2-4394-88d6-b3c54194b868", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import logging\n", 40 | "import os\n", 41 | "import sys\n", 42 | "import re\n", 43 | "import s3fs" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "id": "a8069e66-fad7-4be9-bc6f-b9eaa119b3bc", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "access_key_id = os.environ.get('access_key_id')\n", 54 | "secret_access_key = os.environ.get('secret_access_key')\n", 55 | "endpoint = os.environ.get('endpoint')\n", 56 | "path = os.environ.get('path')\n", 57 | "destination_file = os.environ.get('destination_file','file_list.txt')\n", 58 | "data_dir = os.environ.get('data_dir')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 6, 64 | "id": "ad8bf902-7582-445b-aea1-a9ce869532ee", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "logger = logging.getLogger()\n", 69 | "logger.setLevel(logging.DEBUG)\n", 70 | "sh = logging.StreamHandler(sys.stdout)\n", 71 | "logger.addHandler(sh)\n", 72 | "\n", 73 | "\n", 74 | "parameters = list(\n", 75 | " map(lambda s: re.sub('$', '\"', s),\n", 76 | " map(\n", 77 | " lambda s: s.replace('=', '=\"'),\n", 78 | " filter(\n", 79 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 80 | " sys.argv\n", 81 | " )\n", 82 | " )))\n", 83 | "\n", 84 | "for parameter in parameters:\n", 85 | " logging.info('Parameter: ' + parameter)\n", 86 | " exec(parameter)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 8, 92 | "id": "704117f6-702d-43ee-9092-201c46b31e15", 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "s3 = s3fs.S3FileSystem(\n", 97 | " anon=False,\n", 98 | " key=access_key_id,\n", 99 | " secret=secret_access_key,\n", 100 | " client_kwargs={'endpoint_url': endpoint}\n", 101 | ")\n", 102 | "logger.info('S3 connection established')" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 14, 108 | "id": "2bd96848-eea7-4fc6-b921-d8129a1a1f58", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "with open(data_dir + destination_file, 'w') as file:\n", 113 | " file.write('\\n'.join(s3.glob(path)))\n", 114 | "logger.info('List obtained')" 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3 (ipykernel)", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.9.6" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 5 139 | } 140 | -------------------------------------------------------------------------------- /component-library/metric/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components for computing metrics on data and machine/deep learning models. Besides performance metrics (like accuracy, F1 score, area under ROC, ..) also "TrustedAI" metrics on adversarial robustness, fairness and explainability are supported. 19 | -------------------------------------------------------------------------------- /component-library/metric/metric-aif360.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/metric/metric-aif360.yaml -------------------------------------------------------------------------------- /component-library/metric/metric-aix360-lime.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/metric/metric-aix360-lime.yaml -------------------------------------------------------------------------------- /component-library/metric/metric-confusion-matrix.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/metric/metric-confusion-matrix.yaml -------------------------------------------------------------------------------- /component-library/monitoring/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components for (continuous) (model) performance monitoring and related tasks (like email notification). 19 | -------------------------------------------------------------------------------- /component-library/monitoring/notification-email.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "varying-cathedral", 6 | "metadata": {}, 7 | "source": [ 8 | "# Email Notification\n", 9 | "Sends an email notification\n", 10 | "\n", 11 | "Note: This operator is currently nun-funcional. \n", 12 | "\n", 13 | "Todos \n", 14 | "\n", 15 | "[ ] add blessing file read and condition functionality \n", 16 | "[ ] add email send attachement functionality (to send blessing outcome as attachment) " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "unknown-violation", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import smtplib\n", 27 | "import ssl" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "headed-valentine", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# @dependency codait_utils.ipynb\n", 38 | "# @dependency metadata\n", 39 | "# @dependancy blessing_outcome\n", 40 | "# (file containing TRUE or FALSE in the first line)\n", 41 | "# @param blessing_filename" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "satisfactory-bruce", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import os\n", 52 | "port = os.environ.get('port', 465)\n", 53 | "password = os.environ.get('password')\n", 54 | "server = os.environ.get('server', 'smtp.gmail.com')\n", 55 | "user = os.environ.get('user')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "recognized-kingston", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "skip = True # component not yet implemented completely\n", 66 | "\n", 67 | "if not skip:\n", 68 | "\n", 69 | " port = 465 # For SSL\n", 70 | " password = input(\"Type your password and press enter: \")\n", 71 | "\n", 72 | " # Create a secure SSL context\n", 73 | " context = ssl.create_default_context()\n", 74 | "\n", 75 | " sender_email, receiver_email, message = ('dummy', 'dummy', 'dummy')\n", 76 | "\n", 77 | " with smtplib.SMTP_SSL(server, port, context=context) as server:\n", 78 | " server.login(user, password)\n", 79 | " server.sendmail(sender_email, receiver_email, message)" 80 | ] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "Python 3", 86 | "language": "python", 87 | "name": "python3" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 3 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython3", 99 | "version": "3.8.6" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 5 104 | } 105 | -------------------------------------------------------------------------------- /component-library/monitoring/notification-email.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/monitoring/notification-email.yaml -------------------------------------------------------------------------------- /component-library/nlp/nlp-classify-text-simple.yaml: -------------------------------------------------------------------------------- 1 | name: nlp-transform-snippets 2 | description: creates snippets out of large text files 3 | 4 | inputs: 5 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 6 | 7 | 8 | outputs: 9 | - {name: output_model_zip, type: String, description: 'resulting model zip file name'} 10 | 11 | 12 | implementation: 13 | container: 14 | image: romeokienzler/claimed-nlp-transform-snippets:0.1 15 | command: 16 | - sh 17 | - -ec 18 | - | 19 | python ./nlp-classify-text-simple.py output_model_zip="$0" data_dir="$1" 20 | - {outputPath: output_model_zip} 21 | - {inputValue: data_dir} 22 | -------------------------------------------------------------------------------- /component-library/output/output-rdbms-sqlalchemy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/output/output-rdbms-sqlalchemy.yaml -------------------------------------------------------------------------------- /component-library/output/upload-to-cos-http-adapter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "53af08cf-0149-4369-93d6-fba2203ec6cc", 6 | "metadata": {}, 7 | "source": [ 8 | "# Upload to Cloud Object Storage COS HTTP adapter\n", 9 | "\n", 10 | "Create a HTTP service to upload a file to any S3 compliant Cloud Object Storage" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "9ee25986-e86d-4995-9a1d-0e2ea95e35ab", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "#os.environ['create_image']='True'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "id": "38ee13e4-dd59-4b52-982a-ea5ba3f6594e", 28 | "metadata": { 29 | "scrolled": true, 30 | "tags": [] 31 | }, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Sending build context to Docker daemon 39.94kB\n", 38 | "Step 1/5 : FROM registry.access.redhat.com/ubi8/python-39\n", 39 | " ---> a531ae76755e\n", 40 | "Step 2/5 : RUN pip install ipython nbformat aiobotocore botocore s3fs flask\n", 41 | " ---> Using cache\n", 42 | " ---> f349e56a05a3\n", 43 | "Step 3/5 : ADD upload-to-cos.ipynb /\n", 44 | " ---> 8a007fd5de44\n", 45 | "Step 4/5 : ADD upload-to-cos-http-adapter.ipynb /\n", 46 | " ---> cf63d3c9918f\n", 47 | "Step 5/5 : ENTRYPOINT [\"ipython\",\"/upload-to-cos-http-adapter.ipynb\"]\n", 48 | " ---> Running in 276421fbdf6a\n", 49 | "Removing intermediate container 276421fbdf6a\n", 50 | " ---> 30367437d548\n", 51 | "Successfully built 30367437d548\n", 52 | "Successfully tagged upload-to-cos-http-adapter:latest\n", 53 | "Using default tag: latest\n", 54 | "The push refers to repository [docker.io/romeokienzler/upload-to-cos-http-adapter]\n", 55 | "\n", 56 | "\u001b[1B9fa60caf: Preparing \n", 57 | "\u001b[1Ba6c2215a: Preparing \n", 58 | "\u001b[1Bb4330ba2: Preparing \n", 59 | "\u001b[1B2e0f4ef5: Preparing \n", 60 | "\u001b[1B276847a2: Preparing \n", 61 | "\u001b[1B534f4e1b: Preparing \n", 62 | "\u001b[1Bc926eef9: Preparing \n", 63 | "\u001b[8B9fa60caf: Pushed lready exists 4kB\u001b[5A\u001b[2K\u001b[4A\u001b[2K\u001b[3A\u001b[2K\u001b[1A\u001b[2K\u001b[7A\u001b[2K\u001b[8A\u001b[2Klatest: digest: sha256:4ece79573cbbb8988f04fd5a10ada81b5f1db70590bced8eb29f43616f76cad4 size: 2002\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "if bool(os.environ.get('create_image',False)):\n", 69 | " docker_file=\"\"\"\n", 70 | " FROM registry.access.redhat.com/ubi8/python-39\n", 71 | " RUN pip install ipython nbformat aiobotocore botocore s3fs flask\n", 72 | " ADD upload-to-cos.ipynb /\n", 73 | " ADD upload-to-cos-http-adapter.ipynb /\n", 74 | " ENTRYPOINT [\"ipython\",\"/upload-to-cos-http-adapter.ipynb\"]\n", 75 | " \"\"\"\n", 76 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 77 | " text_file.write(docker_file)\n", 78 | "\n", 79 | " !docker build -t upload-to-cos-http-adapter .\n", 80 | " !docker tag upload-to-cos-http-adapter romeokienzler/upload-to-cos-http-adapter\n", 81 | " !docker push romeokienzler/upload-to-cos-http-adapter\n", 82 | "elif bool(os.environ.get('install_dependencies',False)):\n", 83 | " !pip install aiobotocore botocore s3fs" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "ff2f0a49-bdff-4271-90d5-1735a0c93489", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "from flask import request\n", 94 | "from flask import Flask\n", 95 | "import time\n", 96 | "import json" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "60436d8a-f461-4723-abeb-cc22b555c782", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "app = Flask(__name__)\n", 107 | "\n", 108 | "@app.route('/', methods=['POST'])\n", 109 | "def index():\n", 110 | " message = json.dumps(request.get_json())\n", 111 | " print(message)\n", 112 | " now = time.time()\n", 113 | " print(now)\n", 114 | " with open('/tmp/source.json','w') as file:\n", 115 | " file.write(message)\n", 116 | " !ipython /upload-to-cos.ipynb source_file=\"source.json\" destination_file=$now\".json\"\n", 117 | " return ''\n", 118 | "\n", 119 | "app.run(host='0.0.0.0', port=8080)" 120 | ] 121 | } 122 | ], 123 | "metadata": { 124 | "kernelspec": { 125 | "display_name": "Python 3 (ipykernel)", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.9.6" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 5 144 | } 145 | -------------------------------------------------------------------------------- /component-library/output/upload-to-cos.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: romeokienzler/claimed-upload-to-cos:0.8 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | target: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --target 25 | source_file_pattern: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --source_file_pattern 31 | find_recursive: 32 | type: bool 33 | default: True 34 | inputBinding: 35 | position: 5 36 | prefix: --find_recursive 37 | process_target_file_pattern: 38 | type: string 39 | default: None 40 | inputBinding: 41 | position: 6 42 | prefix: --process_target_file_pattern 43 | 44 | 45 | outputs: [] 46 | -------------------------------------------------------------------------------- /component-library/output/upload-to-cos.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: upload-to-cos 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: upload-to-cos 10 | image: romeokienzler/claimed-upload-to-cos:0.8 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_upload-to-cos.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: target 17 | value: value_of_target 18 | - name: source_file_pattern 19 | value: value_of_source_file_pattern 20 | - name: find_recursive 21 | value: value_of_find_recursive 22 | - name: process_target_file_pattern 23 | value: value_of_process_target_file_pattern 24 | restartPolicy: OnFailure 25 | imagePullSecrets: 26 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/output/upload-to-cos.yaml: -------------------------------------------------------------------------------- 1 | name: process_target_file_pattern 2 | description: "# output-upload-to-cos Uploads a file to any S3 compliant Cloud Object Storage – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: target, type: String, description: "target in format: cos://access_key_id:secret_access_key@endpoint/bucket/path"} 7 | - {name: source_file_pattern, type: String, description: "source folder and file pattern (glob)"} 8 | - {name: find_recursive, type: Boolean, description: "find_recursive, if True, will search for files in subfolders specified in source_file_pattern. Default is True", default: "True'"} 9 | - {name: process_target_file_pattern, type: String, description: "process source file path on target using regex. Default is None", default: "None"} 10 | 11 | 12 | outputs: 13 | 14 | 15 | implementation: 16 | container: 17 | image: romeokienzler/claimed-process_target_file_pattern:0.8 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | ipython ./claimed_upload-to-cos.ipynb log_level="${0}" target="${1}" source_file_pattern="${2}" find_recursive="${3}" process_target_file_pattern="${4}" 23 | - {inputValue: log_level} 24 | - {inputValue: target} 25 | - {inputValue: source_file_pattern} 26 | - {inputValue: find_recursive} 27 | - {inputValue: process_target_file_pattern} 28 | -------------------------------------------------------------------------------- /component-library/predict/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components taking a trained model object and apply it to input data in order to add predictions (as a column) to the input or meta dataset. 19 | -------------------------------------------------------------------------------- /component-library/predict/predict-images.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Predict Images\n", 8 | "Given a model and images the model is applied and the result is added to the metadata" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "!pip3 install tensorflow==2.4.0 wget==3.2" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import wget\n", 27 | "wget.download(\n", 28 | " 'https://raw.githubusercontent.com/IBM/claimed/master/component-library/claimed_utils.py'\n", 29 | ")\n", 30 | "from claimed_utils import parse_args_to_parameters\n", 31 | "import tensorflow as tf\n", 32 | "from tensorflow import keras\n", 33 | "from claimed_utils import unzip\n", 34 | "import os\n", 35 | "import os.path\n", 36 | "import glob\n", 37 | "import pandas as pd\n", 38 | "import numpy as np" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Column name containing the target/prediction value (the real measured value)\n", 48 | "target_column = os.environ.get('target_column', 'target')\n", 49 | "\n", 50 | "# Column name containing the image file name\n", 51 | "image_column = os.environ.get('image_column', 'filename')\n", 52 | "\n", 53 | "# Target image shape (the model expects)\n", 54 | "image_shape = os.environ.get('image_shape', '400,400')\n", 55 | "\n", 56 | "# Column name under which the prediction has to be stored\n", 57 | "prediction_column = os.environ.get('prediction_column', 'prediction')\n", 58 | "\n", 59 | "# data CSV file containing meta information about the images (e.g. real class, ...)\n", 60 | "metadata = os.environ.get('metadata', 'metadata.csv')\n", 61 | "\n", 62 | "# data CSV file containing updated meta information about the images with \"prediction_column\" added\n", 63 | "output_metadata = os.environ.get('output_metadata', 'metadata.csv')\n", 64 | "\n", 65 | "# model zip file name - currently only zipped TensorFlow 2.x pb files supported\n", 66 | "model_zip = os.environ.get('model_zip', 'model.zip')\n", 67 | "\n", 68 | "# zip file name containing all the images\n", 69 | "data_zip = os.environ.get('data_zip', 'data.zip')\n", 70 | "\n", 71 | "parse_args_to_parameters()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "unzip('.', model_zip)\n", 81 | "unzip('.', data_zip)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "model = keras.models.load_model('model')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "classes = glob.glob(\"data/*\")\n", 100 | "classes = map(lambda s: s.split('/')[1], classes)\n", 101 | "classes = list(classes)\n", 102 | "classes" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "df = pd.read_csv(metadata)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "df" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "dataset = []\n", 130 | "\n", 131 | "for i, row in df.iterrows():\n", 132 | " target_size = 'dummy' # make the compiler happy\n", 133 | " exec('target_size = (' + image_shape + ')')\n", 134 | " image = tf.keras.preprocessing.image.load_img(\n", 135 | " 'data/' + row[target_column] + '/' + row[image_column],\n", 136 | " target_size=target_size\n", 137 | " )\n", 138 | " input_arr = keras.preprocessing.image.img_to_array(image)\n", 139 | " input_arr = np.array([input_arr]) # Convert single image to a batch.\n", 140 | " predictions = model.predict(input_arr)\n", 141 | " prediction = predictions[0]\n", 142 | " prediction = classes[np.argmax(prediction)]\n", 143 | " dataset.append(prediction)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "dataset = np.array(dataset)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "df = pd.read_csv(metadata)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "df[prediction_column] = dataset.tolist()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "df" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "df.to_csv(output_metadata, index=False)" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3 (ipykernel)", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.8.10" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 4 213 | } 214 | -------------------------------------------------------------------------------- /component-library/predict/predict-images.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/predict/predict-images.yaml -------------------------------------------------------------------------------- /component-library/run_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import subprocess 3 | 4 | 5 | class TestClaimed(unittest.TestCase): 6 | 7 | def test_ibm_sql_query_cpd_test(self): 8 | correct = subprocess.run(['ipython', './transform/ibm-sql-query-cpd-test.ipynb'], check=True, text=True) 9 | self.assertFalse('Foo'.isupper()) 10 | 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /component-library/segment-anything/generate-masks.yaml: -------------------------------------------------------------------------------- 1 | name: Segment-Anything 2 | description: Generate mask for an given image. 3 | CLAIMED v$version 4 | 5 | inputs: 6 | - {name: model_type, type: String, description: 'model type'} 7 | - {name: checkpoint_path, type: String, description: 'different model type requires different check point'} 8 | - {name: input_image_path, type: String, description: 'input image'} 9 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 10 | 11 | 12 | outputs: 13 | 14 | 15 | implementation: 16 | container: 17 | image: romeokienzler/claimed-Segment-Anything:$version 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | ipython ./generate-masks.ipynb model_type="$0" checkpoint_path="$1" input_image_path="$2" data_dir="$3" 23 | - {outputPath: None} 24 | - {inputValue: model_type} 25 | - {inputValue: checkpoint_path} 26 | - {inputValue: input_image_path} 27 | - {inputValue: data_dir} 28 | -------------------------------------------------------------------------------- /component-library/segment-anything/get-masks.yaml: -------------------------------------------------------------------------------- 1 | name: Segment-Anything 2 | description: Get masks from an input image with input prompt. 3 | CLAIMED v$version 4 | 5 | inputs: 6 | - {name: model_type, type: String, description: 'model type'} 7 | - {name: checkpoint_path, type: String, description: 'different model type requires different check point'} 8 | - {name: input_image_path, type: String, description: 'input image'} 9 | - {name: input_array, type: String, description: 'x,y,x,y,x,y... for points. Multiple allowed'} 10 | - {name: input_box, type: String, description: 'x,y,x,y for box. Only one box allowed'} 11 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 12 | 13 | 14 | outputs: 15 | 16 | 17 | implementation: 18 | container: 19 | image: romeokienzler/claimed-Segment-Anything:$version 20 | command: 21 | - sh 22 | - -ec 23 | - | 24 | ipython ./get-masks.ipynb model_type="$0" checkpoint_path="$1" input_image_path="$2" input_array="$3" input_box="$4" data_dir="$5" 25 | - {outputPath: None} 26 | - {inputValue: model_type} 27 | - {inputValue: checkpoint_path} 28 | - {inputValue: input_image_path} 29 | - {inputValue: input_array} 30 | - {inputValue: input_box} 31 | - {inputValue: data_dir} 32 | -------------------------------------------------------------------------------- /component-library/sim/wrf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "956e75e1", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "\n", 12 | "os.environ['create_image']='True'\n", 13 | "os.environ['repository']='docker.io/romeokienzler'\n", 14 | "os.environ['version']='0.1'\n", 15 | "os.environ['name']='claimed-sim-wrf'" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "id": "cc70e77c", 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Sending build context to Docker daemon 25.6kB\n", 29 | "Step 1/8 : FROM romeokienzler/mywrfcontainerimage\n", 30 | " ---> 7cf57e8f908b\n", 31 | "Step 2/8 : WORKDIR /wrf/WRF\n", 32 | " ---> Using cache\n", 33 | " ---> 96d7a5ddcc69\n", 34 | "Step 3/8 : RUN ./clean -a\n", 35 | " ---> Using cache\n", 36 | " ---> bb1a1805bf3a\n", 37 | "Step 4/8 : RUN ./configure\n", 38 | " ---> Running in dcd9136a53bd\n", 39 | "checking for perl5... no\n", 40 | "checking for perl... found /usr/bin/perl (perl)\n", 41 | " \n", 42 | "*****************************************************************************\n", 43 | "No environment variable NETCDF set.\n", 44 | "Stopping\n", 45 | "*****************************************************************************\n", 46 | " \n", 47 | "The command '/bin/sh -c ./configure' returned a non-zero code: 5\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "if bool(os.environ.get('create_image',False)):\n", 53 | " docker_file=\"\"\"\n", 54 | " FROM romeokienzler/mywrfcontainerimage\n", 55 | " SHELL [\"/bin/tcsh\", \"-c\"]\n", 56 | " WORKDIR /wrf/WRF\n", 57 | " RUN ./clean -a\n", 58 | " RUN ./configure\n", 59 | " RUN ./compile em_real >&! foo\n", 60 | " WORKDIR /wrf/WPS\n", 61 | " RUN ./configure\n", 62 | " RUN ./compile >&! foo\n", 63 | " \"\"\"\n", 64 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 65 | " text_file.write(docker_file)\n", 66 | "\n", 67 | " !docker build -t `echo $name`:`echo $version` .\n", 68 | " #!docker tag `echo $name`:`echo $version` `echo $repository`/`echo $name`:`echo $version`\n", 69 | " #!docker push `echo $repository`/`echo $name`:`echo $version`\n", 70 | " !rm Dockerfile\n", 71 | "else: \n", 72 | " None" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "57ab222c-9f77-495a-8f5a-080138c58f34", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "#! /bin/sh\n", 83 | "ls\n", 84 | "docker run -it --name teachme 7cf57e8f908b /bin/tcsh\n", 85 | "ls -ls\n", 86 | "cd WRF\n", 87 | "./clean -a\n", 88 | "./configure\n", 89 | "./compile em_real >&! foo\n", 90 | "ls -ls main/*.exe\n", 91 | "cd ../WPS\n", 92 | "./configure\n", 93 | "./compile >&! foo\n", 94 | "ls -ls *.exe\n", 95 | "cp namelist.wps namelist.wps.original\n", 96 | "cp /wrf/wrfinput/namelist.wps.docker namelist.wps\n", 97 | "./geogrid.exe\n", 98 | "ls -ls geo_em.d01.nc\n", 99 | "./link_grib.csh /wrf/wrfinput/fnl\n", 100 | "cp ungrib/Variable_Tables/Vtable.GFS Vtable\n", 101 | "./ungrib.exe\n", 102 | "ls -ls FILE*\n", 103 | "./metgrid.exe\n", 104 | "ls -ls met_em.*\n", 105 | "cd ../WRF/test/em_real\n", 106 | "ln -sf ../../../WPS/met_em* .\n", 107 | "cp namelist.input namelist.input.original\n", 108 | "cp /wrf/wrfinput/namelist.input.docker namelist.input\n", 109 | "mpirun -np 2 ./real.exe\n", 110 | "tail rsl.out.0000\n", 111 | "ls -ls wrfinput_d01 wrfbdy_d01\n", 112 | "mpirun -np 3 ./wrf.exe &\n", 113 | "tail rsl.out.0000\n", 114 | "ls -ls wrfo*" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "a7e82c13", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Python 3 (ipykernel)", 129 | "language": "python", 130 | "name": "python3" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 3 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython3", 142 | "version": "3.11.1" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 5 147 | } 148 | -------------------------------------------------------------------------------- /component-library/sim/wrf.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: wrf 6 | labels: 7 | app: wrf 8 | spec: 9 | containers: 10 | - name: wrf 11 | image: docker.io/romeokienzler/mywrfcontainerimage 12 | command: ["sleep", "infinity"] -------------------------------------------------------------------------------- /component-library/train/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components responsible for training machine/deep learning models given input data and a set of configuration parameters. 19 | -------------------------------------------------------------------------------- /component-library/train/spark-train-lr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/train/spark-train-lr.yaml -------------------------------------------------------------------------------- /component-library/train/train-mobilenet_v2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/train/train-mobilenet_v2.yaml -------------------------------------------------------------------------------- /component-library/transform/README.md: -------------------------------------------------------------------------------- 1 | 18 | This folder contains components for transforming data. 19 | 20 | In case of structured data, everything which can be done by applying a function on columns or their aggregates falls under this category. 21 | 22 | In case of image data, tasks like reshaping, converting or changing the underlying folder structure of the images falls under this category. 23 | -------------------------------------------------------------------------------- /component-library/transform/cloud-object-store-housekeeping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "53af08cf-0149-4369-93d6-fba2203ec6cc", 6 | "metadata": {}, 7 | "source": [ 8 | "# cloud-object-store-housekeeping" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "c8e13f0c-0f46-4ffa-982e-620cb2c5ad82", 14 | "metadata": {}, 15 | "source": [ 16 | "Housekeeping (delete, list, ...) on any S3 compliant Cloud Object Storage" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "05a6d935-38c5-49df-9df2-fbc8c31c4634", 23 | "metadata": { 24 | "scrolled": true, 25 | "tags": [] 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "!pip install aiobotocore==2.0.1\n", 30 | "!pip install botocore==1.22.8\n", 31 | "!pip install s3fs==2021.11.1" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "85859764-8fd2-4394-88d6-b3c54194b868", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import logging\n", 42 | "import os\n", 43 | "import sys\n", 44 | "import re\n", 45 | "import s3fs" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "a8069e66-fad7-4be9-bc6f-b9eaa119b3bc", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "access_key_id = os.environ.get('access_key_id')\n", 56 | "secret_access_key = os.environ.get('secret_access_key')\n", 57 | "endpoint = os.environ.get('endpoint')\n", 58 | "bucket_name = os.environ.get('bucket_name')\n", 59 | "\n", 60 | "# default: ls, operation in rm (delete), ls (list), walk (walk the tree)\n", 61 | "operation = os.environ.get('operation', 'ls')\n", 62 | "\n", 63 | "# default: sysout, in \"ls\", \"walk\" operation, return list on sysout or as file (provide file name instead of sysout)\n", 64 | "return_mode = os.environ.get('return_mode', 'sysout')\n", 65 | "\n", 66 | "# defaul: empty string, file/folder to operate on\n", 67 | "path = os.environ.get('path', \"\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "id": "ad8bf902-7582-445b-aea1-a9ce869532ee", 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "parameters = list(\n", 78 | " map(lambda s: re.sub('$', '\"', s),\n", 79 | " map(\n", 80 | " lambda s: s.replace('=', '=\"'),\n", 81 | " filter(\n", 82 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 83 | " sys.argv\n", 84 | " )\n", 85 | " )))\n", 86 | "\n", 87 | "for parameter in parameters:\n", 88 | " logging.warning('Parameter: ' + parameter)\n", 89 | " exec(parameter)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "id": "704117f6-702d-43ee-9092-201c46b31e15", 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "s3 = s3fs.S3FileSystem(\n", 100 | " anon=False,\n", 101 | " key=access_key_id,\n", 102 | " secret=secret_access_key,\n", 103 | " client_kwargs={'endpoint_url': endpoint}\n", 104 | ")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "60436d8a-f461-4723-abeb-cc22b555c782", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "if operation == 'ls':\n", 115 | " result_list = s3.ls(bucket_name)\n", 116 | " if return_mode == 'sysout':\n", 117 | " print(result_list)\n", 118 | "elif operation == 'walk':\n", 119 | " result_list = s3.walk(bucket_name + '/' + path, maxdepth=100)\n", 120 | " if return_mode == 'sysout':\n", 121 | " for item in result_list:\n", 122 | " print(item)\n", 123 | "elif operation == 'rm':\n", 124 | " s3.rm(bucket_name + '/' + path, recursive=True, maxdepth=100)\n", 125 | "else:\n", 126 | " raise 'operation unknown: {}'.format(operation)" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3 (ipykernel)", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.9.6" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 5 151 | } 152 | -------------------------------------------------------------------------------- /component-library/transform/cpdconfig.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | - name: watson-studio 3 | type: watson-studio 4 | service: 5 | url: "https://api.dataplatform.dev.cloud.ibm.com" -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query-cpd-manual.yaml: -------------------------------------------------------------------------------- 1 | name: Data Engine (SQL) 2 | description: Run arbitrary SQL based data transformation jobs on CSV, PARQUET, JSON, AVRO and ORC data stored on Cloud Object Storage using IBM Data Engine. Transformation results are stored back to Cloud Object Storage 3 | 4 | metadata: 5 | annotations: 6 | platform: IBM Cloud Pak for Data 7 | component_group: run 8 | component_type: data-engine 9 | component_version: '1' 10 | component_type_name: IBM Data Engine 11 | entitlements: data-engine 12 | feature_flag: data-engine 13 | 14 | inputs: 15 | - name: data_engine_crn 16 | type: 17 | InstanceCRN: 18 | service_name: sql-query 19 | optional: 20 | description: (unique) Custom Resource Name (CRN) of IBM Data Engine Service 21 | annotations: 22 | human_name: Data Engine CRN 23 | - name: sql 24 | type: String 25 | description: sql statement to execute 26 | annotations: 27 | input_type: textarea 28 | human_name: SQL Query 29 | - name: target_dir_path 30 | type: 31 | CPDPath: 32 | path_type: file 33 | datasource_type: bluemixcloudobjectstorage 34 | description: COS URL where the results of the SQL job are to be stored 35 | annotations: 36 | human_name: Result COS URL 37 | allowed_scopes: 38 | - space 39 | - project 40 | - {name: target_asset_name, type: String, description: 'Asset name to register for the results written by the SQL job', annotations: {human_name: Target Asset Name}} 41 | - name: format 42 | description: Output file format, default CSV - (will be generated into according STORED AS … clause in the INTO clause) 43 | annotations: 44 | human_name: Output File Format 45 | type: String 46 | validators: 47 | enum: 48 | - value: csv 49 | human_name: CSV 50 | - value: parquet 51 | human_name: Parquet 52 | - value: avro 53 | human_name: Avro 54 | - value: orc 55 | human_name: ORC 56 | - value: json 57 | human_name: JSON 58 | - {name: partition_columns, type: String, description: 'list of columns to use for partitioning the results of the SQL job, will be generated into according PARTITIONED BY () clause in the INTO clause)', optional: true, annotations: {human_name: Partition Columns}} 59 | - {name: number_of_objects, type: Integer, default: 0, description: 'number of objects to store the results of the SQL job in, will be generated into according PARTITIONED INTO OBJECTS clause in INTO clause', optional: true, annotations: {human_name: Number of Objects}} 60 | - {name: rows_per_object, type: Integer, default: 0, description: 'number of rows to be stored in each result object of the SQL job, will be generated into according PARTITIONED EVERY ROWS clause in INTO clause', optional: true, annotations: {human_name: Rows per Object}} 61 | - {name: exact_name, type: Bool, default: False, description: 'only valid when none of the above partitioning option is specified, produces exactly one object with name specified in target_dir_path, twill be generated into sqlClient.rename_exact_result(jobid) after SQL has run.', annotations: {human_name: Exact Name}} 62 | - {name: no_jobid_folder, type: Bool, default: False, description: 'will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)', annotations: {human_name: No jobid Folder}} 63 | - {name: token, type: String, default: False, description: 'IBM cloud token to be injected)', annotations: {human_name: IBM Cloud Token}} 64 | 65 | outputs: 66 | - name: data_asset 67 | type: 68 | CPDPath: 69 | path_type: resource 70 | resource_type: asset 71 | asset_type: data_asset 72 | description: output file name containing the CPD path of the resulting asset 73 | annotations: 74 | human_name: Output file name containing the CPD path of the resulting asset 75 | 76 | implementation: 77 | container: 78 | image: us.icr.io/ai-lifecycle-staging/ibm_sql_query_cpd:0.28 79 | command: 80 | - sh 81 | - -ec 82 | - | 83 | ipython ./ibm-sql-query-cpd.ipynb data_asset="$0" target_dir_path="$1" target_asset_name="$2" sql="$3" token="$4" sql_query_crn="$5" format="$6" partition_columns="$7" number_of_objects="$8" rows_per_object="$9" exact_name="$10" no_jobid_folder="$11" 84 | - {outputPath: data_asset} 85 | - {inputValue: target_dir_path} 86 | - {inputValue: target_asset_name} 87 | - {inputValue: sql} 88 | - {inputValue: token} 89 | - {inputValue: data_engine_crn} 90 | - {inputValue: format} 91 | - {inputValue: partition_columns} 92 | - {inputValue: number_of_objects} 93 | - {inputValue: rows_per_object} 94 | - {inputValue: exact_name} 95 | - {inputValue: no_jobid_folder} 96 | -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query-cpd.yaml: -------------------------------------------------------------------------------- 1 | name: ibm_sql_query_cpd 2 | description: Run arbitrary SQL based data transformation jobs on CSV and PARQUET data stored on Cloud Object Storage using IBM SQL Query. Transformation results are written stored back to Cloud Object Storage 3 | 4 | inputs: 5 | - {name: target_dir_path, type: String, description: 'COS URL where the results of the SQL job are to be stored'} 6 | - {name: target_asset_name, type: String, description: 'Asset name to register for the results written by the SQL job'} 7 | - {name: sql, type: String, description: 'sql statement to execute ()'} 8 | - {name: sql_query_crn, type: String, description: '(unique) Custom Resource Name (CRN) of IBM SQL Query Service'} 9 | - {name: format, type: String, description: 'default: CSV - (will be generated into according STORED AS … clause in the INTO clause)'} 10 | - {name: partition_columns, type: String, description: 'optional, list of columns to use for partitioning the results of the SQL job, will be generated into according PARTITIONED BY () clause in the INTO clause)'} 11 | - {name: number_of_objects, type: String, description: 'optional, number of objects to store the results of the SQL job in, will be generated into according PARTITIONED INTO OBJECTS clause in INTO clause'} 12 | - {name: rows_per_object, type: String, description: 'optional, number of rows to be stored in each result object of the SQL job, will be generated into according PARTITIONED EVERY ROWS clause in INTO clause'} 13 | - {name: exact_name, type: String, description: 'default: False, only valid when none of the above partitioning option is specified, produces exactly one object with name specified in target_dir_path, twill be generated into sqlClient.rename_exact_result(jobid) after SQL has run.'} 14 | - {name: no_jobid_folder, type: String, description: 'default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)'} 15 | 16 | 17 | outputs: 18 | - {name: output_file_name, type: String, description: 'default: output.txt - output file name containing the CPD path of the resulting asset'} 19 | 20 | 21 | implementation: 22 | container: 23 | image: continuumio/anaconda3:2020.07 24 | command: 25 | - sh 26 | - -ec 27 | - | 28 | mkdir -p `echo $0 |sed -e 's/\/[a-zA-Z0-9]*$//'` 29 | wget x 30 | ipython y output_file_name="$0" target_dir_path="$1" target_asset_name="$2" sql="$3" sql_query_crn="$4" format="$5" partition_columns="$6" number_of_objects="$7" rows_per_object="$8" exact_name="$9" no_jobid_folder="$10" 31 | - {outputPath: output_file_name} 32 | - {inputValue: target_dir_path} 33 | - {inputValue: target_asset_name} 34 | - {inputValue: sql} 35 | - {inputValue: sql_query_crn} 36 | - {inputValue: format} 37 | - {inputValue: partition_columns} 38 | - {inputValue: number_of_objects} 39 | - {inputValue: rows_per_object} 40 | - {inputValue: exact_name} 41 | - {inputValue: no_jobid_folder} 42 | -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query.config: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | output_uri=cos://s3.eu-de.cloud-object-storage.appdomain.cloud/claimed-test/result 3 | sql_query_crn=crn:v1:bluemix:public:sql-query:us-south:a/9b13b857a32341b7167255de717172f5:f9dd6c9e-b24b-4506-819e-e038c92339e4:: 4 | sql=SELECT * FROM cos://eu-de/claimed-test/data.parquet stored as parquet 5 | endpoint=https://s3.eu-de.cloud-object-storage.appdomain.cloud 6 | bucket_name=claimed-test 7 | -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query.dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.access.redhat.com/ubi8/ubi-minimal as base 2 | 3 | ARG UID=2000 4 | ARG GID=2000 5 | 6 | # we install pip and other python packages in /usr/local 7 | ENV PATH="/usr/local/bin:${PATH}" 8 | 9 | RUN microdnf update -y \ 10 | && microdnf install -y --disableplugin=subscription-manager \ 11 | gcc gcc-c++ make.x86_64 openssl-devel bzip2-devel libffi-devel glibc-langpack-en \ 12 | java-1.8.0-openjdk-devel \ 13 | shadow-utils \ 14 | findutils \ 15 | util-linux \ 16 | sqlite-devel \ 17 | python38 python38-setuptools python38-devel \ 18 | && microdnf clean all \ 19 | && pip3 install --upgrade pip 20 | 21 | RUN microdnf update -y \ 22 | && microdnf install -y --disableplugin=subscription-manager \ 23 | git 24 | 25 | RUN microdnf update -y \ 26 | && microdnf install -y --disableplugin=subscription-manager \ 27 | wget curl 28 | 29 | RUN pip install --upgrade ipython 30 | 31 | RUN pip install --upgrade nbformat==5.1.3 -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query.secrets.template: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | token=aZq..._yfgQ7 3 | api_key=aZq..._yfgQ7 4 | access_key_id=f21...7263cb72 5 | secret_access_key=2b7a5ade...4b84 6 | -------------------------------------------------------------------------------- /component-library/transform/ibm-sql-query.yaml: -------------------------------------------------------------------------------- 1 | name: ibm_sql_query 2 | description: Execute arbitrary SQL queries against CSV and PARQUET files using IBM Cloud SQL Query and Cloud Object Store 3 | 4 | inputs: 5 | - {name: api_key, type: String, description: 'IBM Cloud API key (alternative to token)'} 6 | - {name: token, type: String, description: 'IBM Cloud Token (alternative to API key)'} 7 | - {name: sql_query_crn, type: String, description: '(unique) Custom Resource Name (CRN) of IBM SQL Query Service'} 8 | - {name: out_format, type: String, description: 'default: CSV - (will be generated into according STORED AS … clause in the INTO clause)'} 9 | - {name: out_partition_columns, type: String, description: 'if set - will be generated into according PARTITIONED BY () clause in the INTO clause)'} 10 | - {name: out_number_of_objects, type: String, description: 'will be generated into according PARTITIONED INTO OBJECTS clause in INTO clause'} 11 | - {name: out_rows_per_object, type: String, description: 'will be generated into according PARTITIONED EVERY ROWS clause in INTO clause'} 12 | - {name: out_exact_name, type: String, description: 'default: False - only valid when no partitioning option is specified. Will be generated into sqlClient.rename_exact_result(jobid) after SQL has run.'} 13 | - {name: out_no_jobid_folder, type: String, description: 'default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)'} 14 | - {name: sql, type: String, description: 'sql statement to execute ()'} 15 | 16 | 17 | outputs: 18 | - {name: output_uri, type: String, description: 'URI of resulting file (example: cos://s3.eu-de.cloud-object-storage.appdomain.cloud/cos-rkie-sqlquery-test/result)'} 19 | 20 | 21 | implementation: 22 | container: 23 | image: continuumio/anaconda3:2020.07 24 | command: 25 | - sh 26 | - -ec 27 | - | 28 | mkdir -p `echo $0 |sed -e 's/\/[a-zA-Z0-9]*$//'` 29 | wget https://raw.githubusercontent.com/IBM/claimed/master/component-library/transform/ibm-sql-query.ipynb 30 | ipython ibm-sql-query.ipynb output_uri="$0" api_key="$1" token="$2" sql_query_crn="$3" out_format="$4" out_partition_columns="$5" out_number_of_objects="$6" out_rows_per_object="$7" out_exact_name="$8" out_no_jobid_folder="$9" sql="$10" 31 | - {outputPath: output_uri} 32 | - {inputValue: api_key} 33 | - {inputValue: token} 34 | - {inputValue: sql_query_crn} 35 | - {inputValue: out_format} 36 | - {inputValue: out_partition_columns} 37 | - {inputValue: out_number_of_objects} 38 | - {inputValue: out_rows_per_object} 39 | - {inputValue: out_exact_name} 40 | - {inputValue: out_no_jobid_folder} 41 | - {inputValue: sql} 42 | -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | directory_path: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --directory_path 25 | destination: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --destination 31 | tile_size_x: 32 | type: int 33 | default: 64 34 | inputBinding: 35 | position: 5 36 | prefix: --tile_size_x 37 | tile_size_y: 38 | type: int 39 | default: 64 40 | inputBinding: 41 | position: 6 42 | prefix: --tile_size_y 43 | stride_x: 44 | type: int 45 | default: 32 46 | inputBinding: 47 | position: 7 48 | prefix: --stride_x 49 | stride_y: 50 | type: int 51 | default: 32 52 | inputBinding: 53 | position: 8 54 | prefix: --stride_y 55 | 56 | 57 | outputs: [] 58 | -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.cwl:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/image-tiling-with-metadata_adjustment.cwl:Zone.Identifier -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: image-tiling-with-metadata-adjustment 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: image-tiling-with-metadata-adjustment 10 | image: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_image-tiling-with-metadata_adjustment.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: directory_path 17 | value: value_of_directory_path 18 | - name: destination 19 | value: value_of_destination 20 | - name: tile_size_x 21 | value: value_of_tile_size_x 22 | - name: tile_size_y 23 | value: value_of_tile_size_y 24 | - name: stride_x 25 | value: value_of_stride_x 26 | - name: stride_y 27 | value: value_of_stride_y 28 | restartPolicy: OnFailure 29 | imagePullSecrets: 30 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.yaml: -------------------------------------------------------------------------------- 1 | name: image-tiling-with-metadata-adjustment 2 | description: "## Xview Dataset clipping – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: directory_path, type: String, description: "source is the path to the folder with the unzipped .tif images from xview dataset"} 7 | - {name: destination, type: String, description: "destination is the path to the folder which saves all the extracted tiles."} 8 | - {name: tile_size_x, type: Integer, description: "Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels", default: "64"} 9 | - {name: tile_size_y, type: Integer, description: "", default: "64"} 10 | - {name: stride_x, type: Integer, description: "For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y", default: "32"} 11 | - {name: stride_y, type: Integer, description: "stride_y is the length in pixels the sliding window is moved down after completing a row", default: "32"} 12 | 13 | 14 | outputs: 15 | 16 | 17 | implementation: 18 | container: 19 | image: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 20 | command: 21 | - sh 22 | - -ec 23 | - | 24 | ipython ./claimed_image-tiling-with-metadata_adjustment.ipynb log_level="${0}" directory_path="${1}" destination="${2}" tile_size_x="${3}" tile_size_y="${4}" stride_x="${5}" stride_y="${6}" 25 | - {inputValue: log_level} 26 | - {inputValue: directory_path} 27 | - {inputValue: destination} 28 | - {inputValue: tile_size_x} 29 | - {inputValue: tile_size_y} 30 | - {inputValue: stride_x} 31 | - {inputValue: stride_y} 32 | -------------------------------------------------------------------------------- /component-library/transform/image-tiling-with-metadata_adjustment.yaml:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/image-tiling-with-metadata_adjustment.yaml:Zone.Identifier -------------------------------------------------------------------------------- /component-library/transform/spark-condense-parquet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/spark-condense-parquet.yaml -------------------------------------------------------------------------------- /component-library/transform/spark-csv-to-parquet.yaml: -------------------------------------------------------------------------------- 1 | name: spark-csv-to-parquet 2 | description: Converts a CSV file with header to parquet using ApacheSpark 3 | 4 | inputs: 5 | - {name: data_csv, type: String, description: 'source path and file name (default: data.csv)'} 6 | - {name: master, type: String, description: 'url of master (default: local mode)'} 7 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 8 | 9 | 10 | outputs: 11 | - {name: output_data_parquet, type: String, description: 'destination path and parquet file name (default: data.parquet)'} 12 | 13 | 14 | implementation: 15 | container: 16 | image: romeokienzler/claimed-spark-csv-to-parquet:0.1 17 | command: 18 | - sh 19 | - -ec 20 | - | 21 | python ./spark-csv-to-parquet.py output_data_parquet="$0" data_csv="$1" master="$2" data_dir="$3" 22 | - {outputPath: output_data_parquet} 23 | - {inputValue: data_csv} 24 | - {inputValue: master} 25 | - {inputValue: data_dir} 26 | -------------------------------------------------------------------------------- /component-library/transform/spark-json-to-parquet.yaml: -------------------------------------------------------------------------------- 1 | name: spark-json-to-parquet 2 | description: Converts a JSON file to parquet using ApacheSpark CLAIMED v0.2m 3 | 4 | inputs: 5 | - {name: data_json, type: String, description: 'source path and file name (default: data.csv)'} 6 | - {name: master, type: String, description: 'url of master (default: local mode)'} 7 | - {name: data_dir, type: String, description: 'temporal data storage for local execution'} 8 | 9 | 10 | outputs: 11 | - {name: output_data_parquet, type: String, description: 'destination path and parquet file name (default: data.parquet)'} 12 | 13 | 14 | implementation: 15 | container: 16 | image: romeokienzler/claimed-spark-json-to-parquet:0.2m 17 | command: 18 | - sh 19 | - -ec 20 | - | 21 | ipython ./spark-json-to-parquet.ipynb output_data_parquet="$0" data_json="$1" master="$2" data_dir="$3" 22 | - {outputPath: output_data_parquet} 23 | - {inputValue: data_json} 24 | - {inputValue: master} 25 | - {inputValue: data_dir} 26 | -------------------------------------------------------------------------------- /component-library/transform/spark-sql-interactive/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.access.redhat.com/ubi8/python-39 2 | USER root 3 | RUN dnf -y install java-1.8.0-openjdk maven 4 | USER default 5 | RUN wget https://dlcdn.apache.org/spark/spark-3.3.0/spark-3.3.0-bin-hadoop3.tgz 6 | RUN tar xvfz spark-3.3.0-bin-hadoop3.tgz 7 | ENV SPARK_HOME=/opt/app-root/src/spark-3.3.0-bin-hadoop3 8 | ENV PATH=$PATH:$SPARK_HOME/bin 9 | RUN git clone https://github.com/CODAIT/stocator.git 10 | RUN cd /opt/app-root/src/stocator && git checkout v1.1.5 11 | RUN cd /opt/app-root/src/stocator && mvn clean package -Pall-in-one 12 | RUN cd /opt/app-root/src/stocator && mvn dependency:tree 13 | #ADD spark-sql-interactive/stocator-1.1.5-jar-with-dependencies.jar . 14 | RUN pip install pyspark==3.3.0 apache-iceberg==0.0.2 ipython nbformat jupyter nbconvert ipykernel 15 | ADD spark-sql-interactive.ipynb . 16 | RUN jupyter nbconvert --to script spark-sql-interactive.ipynb 17 | CMD python -m ipykernel_launcher -f $DOCKERNEL_CONNECTION_FILE -------------------------------------------------------------------------------- /component-library/transform/spark-sql-interactive/app.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | 3 | #stocator_jar = '/home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/stocator-1.1.5-jar-with-dependencies.jar' 4 | stocator_jar = '/opt/app-root/src/stocator/target/stocator-1.1.5-jar-with-dependencies.jar' 5 | spark_builder = ( 6 | SparkSession 7 | .builder 8 | .appName('test_app')) 9 | 10 | spark_builder.config('spark.driver.extraClassPath', stocator_jar) 11 | spark_builder.config('spark.executor.extraClassPath', stocator_jar) 12 | spark_builder.config('fs.cos.myCos.access.key', 'afcfd8cda8dd4b17856577ec654c42e7') 13 | spark_builder.config('fs.cos.myCos.secret.key', '') 14 | spark_builder.config('fs.cos.myCos.endpoint', 's3.eu-de.cloud-object-storage.appdomain.cloud') 15 | spark_builder.config("spark.hadoop.fs.stocator.scheme.list", "cos") 16 | spark_builder.config("spark.hadoop.fs.cos.impl", "com.ibm.stocator.fs.ObjectStoreFileSystem") 17 | spark_builder.config("fs.stocator.cos.impl", "com.ibm.stocator.fs.cos.COSAPIClient") 18 | spark_builder.config("fs.stocator.cos.scheme", "cos") 19 | 20 | spark = spark_builder.getOrCreate() 21 | df = spark.read.csv("cos://claimed-spark-interactive.myCos/chat-en_us.csv") 22 | df.createOrReplaceTempView('df') 23 | df = spark.sql('select * from df') 24 | #df.write.csv('cos://claimed-spark-interactive.myCos/chat-en_us3.csv') 25 | df.writeTo("ibdb").create() 26 | 27 | print("count:"+str(df.count())) 28 | -------------------------------------------------------------------------------- /component-library/transform/spark-sql-interactive/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker build -t spark-sql-interactive:`echo $version` -f spark-sql-interactive/Dockerfile . 3 | docker tag spark-sql-interactive:`echo $version` `echo $repository`/spark-sql-interactive:`echo $version` 4 | docker push `echo $repository`/spark-sql-interactive:`echo $version` -------------------------------------------------------------------------------- /component-library/transform/spark-sql.yaml: -------------------------------------------------------------------------------- 1 | name: SparkSQL 2 | description: Execute arbitrary SQL queries againts CSV and PARQUET files 3 | 4 | inputs: 5 | - {name: data_file, type: String, description: 'file name for CSV or PARQUET file - must end with .csv or .parquet (default: data.csv)'} 6 | - {name: master, type: String, description: 'master url of spark master (default: local mode)'} 7 | - {name: data_dir, type: String, description: 'data_dir temporal data storage for local execution (default: ../../data/)'} 8 | - {name: sql, type: String, description: 'sql statement to execute, table name == df, example: select * from df'} 9 | 10 | 11 | outputs: 12 | - {name: output_result_file, type: String, description: 'name of resulting file (default: data_result.csv)'} 13 | 14 | 15 | implementation: 16 | container: 17 | image: romeokienzler/claimed-SparkSQL:0.1 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | python ./spark-sql.py output_result_file="$0" data_file="$1" master="$2" data_dir="$3" sql="$4" 23 | - {outputPath: output_result_file} 24 | - {inputValue: data_file} 25 | - {inputValue: master} 26 | - {inputValue: data_dir} 27 | - {inputValue: sql} 28 | -------------------------------------------------------------------------------- /component-library/transform/transform-apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "polished-click", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.0052, 9 | "end_time": "2021-03-12T08:33:43.246663", 10 | "exception": false, 11 | "start_time": "2021-03-12T08:33:43.241463", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "# Applies function on pandas data frame\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "id": "gentle-lightning", 24 | "metadata": { 25 | "papermill": { 26 | "duration": 28.520857, 27 | "end_time": "2021-03-12T08:34:11.772333", 28 | "exception": false, 29 | "start_time": "2021-03-12T08:33:43.251476", 30 | "status": "completed" 31 | }, 32 | "tags": [] 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "!pip3 install pandas==1.2.1" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "unavailable-antenna", 43 | "metadata": { 44 | "papermill": { 45 | "duration": 0.267363, 46 | "end_time": "2021-03-12T08:34:12.062325", 47 | "exception": false, 48 | "start_time": "2021-03-12T08:34:11.794962", 49 | "status": "completed" 50 | }, 51 | "tags": [] 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "import os\n", 56 | "import pandas as pd" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "id": "generous-margin", 63 | "metadata": { 64 | "papermill": { 65 | "duration": 0.020275, 66 | "end_time": "2021-03-12T08:34:12.098601", 67 | "exception": false, 68 | "start_time": "2021-03-12T08:34:12.078326", 69 | "status": "completed" 70 | }, 71 | "tags": [] 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# @param column where function needs to be applied\n", 76 | "# @param function to be applied on data element\n", 77 | "# @param file_name csv file name\n", 78 | "# @returns updated csv" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "gross-click", 85 | "metadata": { 86 | "papermill": { 87 | "duration": 0.021922, 88 | "end_time": "2021-03-12T08:34:12.137016", 89 | "exception": false, 90 | "start_time": "2021-03-12T08:34:12.115094", 91 | "status": "completed" 92 | }, 93 | "tags": [] 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "column = os.environ.get('column')\n", 98 | "function = os.environ.get('function')\n", 99 | "file_name = os.environ.get('file_name', 'metadata.csv')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "sought-navigation", 106 | "metadata": { 107 | "papermill": { 108 | "duration": 0.086226, 109 | "end_time": "2021-03-12T08:34:12.240530", 110 | "exception": true, 111 | "start_time": "2021-03-12T08:34:12.154304", 112 | "status": "failed" 113 | }, 114 | "tags": [] 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "metadata = pd.read_csv(file_name)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "apart-electricity", 125 | "metadata": { 126 | "papermill": { 127 | "duration": null, 128 | "end_time": null, 129 | "exception": null, 130 | "start_time": null, 131 | "status": "pending" 132 | }, 133 | "tags": [] 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "lf = 'dummy' # to make the compiler happy\n", 138 | "exec('lf = lambda x: x.' + function)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "mexican-harvey", 145 | "metadata": { 146 | "papermill": { 147 | "duration": null, 148 | "end_time": null, 149 | "exception": null, 150 | "start_time": null, 151 | "status": "pending" 152 | }, 153 | "tags": [] 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "metadata[column] = metadata[column].apply(lf)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "amended-blocking", 164 | "metadata": { 165 | "papermill": { 166 | "duration": null, 167 | "end_time": null, 168 | "exception": null, 169 | "start_time": null, 170 | "status": "pending" 171 | }, 172 | "tags": [] 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "metadata.to_csv(file_name, index=False)" 177 | ] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Python 3 (ipykernel)", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.9.6" 197 | }, 198 | "papermill": { 199 | "default_parameters": {}, 200 | "duration": 31.039908, 201 | "end_time": "2021-03-12T08:34:13.465890", 202 | "environment_variables": {}, 203 | "exception": true, 204 | "input_path": "/home/jovyan/work/examples/pipelines/claimed_covid_ct_trusted_ai/component-library/transform/transform-apply.ipynb", 205 | "output_path": "/home/jovyan/work/examples/pipelines/claimed_covid_ct_trusted_ai/component-library/transform/transform-apply.ipynb", 206 | "parameters": {}, 207 | "start_time": "2021-03-12T08:33:42.425982", 208 | "version": "2.3.2" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 5 213 | } 214 | -------------------------------------------------------------------------------- /component-library/transform/transform-images.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "tired-hands", 6 | "metadata": {}, 7 | "source": [ 8 | "# Transforms image folder structure\n", 9 | "Using an image source folder and metadata rearrage the folder structure such that each subfolder is a category containing the images of that category" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "id": "generous-grammar", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "!pip3 install pandas==1.2.1 wget==3.2" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "independent-blind", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import wget\n", 30 | "wget.download(\n", 31 | " 'https://raw.githubusercontent.com/'\n", 32 | " 'elyra-ai/component-library/master/claimed_utils.py'\n", 33 | ")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "sorted-carbon", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import os\n", 44 | "from claimed_utils import unzip, zipdir\n", 45 | "import pandas as pd\n", 46 | "from shutil import copyfile" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "id": "flexible-shame", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# @dependency codait_utils.ipynb\n", 57 | "# @param metadata csv file containing metadata on the images\n", 58 | "# @param source_folder containing the images\n", 59 | "# @param target_column name on the metadata containing the target (class) value\n", 60 | "# @param image_column name on the metadata containing\n", 61 | "# the file name to the image\n", 62 | "# @param output_folder name of the folder where to put the images to\n", 63 | "# @param images_zip file name containing the orginital images\n", 64 | "# @param data_zip file name where the images are stored into\n", 65 | "# @param data folder name\n", 66 | "# @returns" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "united-bouquet", 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "metadata = os.environ.get('metadata', 'metadata.csv')\n", 77 | "source_folder = os.environ.get('source_folder', 'images')\n", 78 | "target_column = os.environ.get('target_column', 'finding')\n", 79 | "image_column = os.environ.get('image_column', 'filename')\n", 80 | "output_folder = os.environ.get('output_folder', 'data')\n", 81 | "images_zip = os.environ.get('images_zip', 'images.zip')\n", 82 | "data_zip = os.environ.get('data_zip', 'data.zip')\n", 83 | "data = os.environ.get('data', 'data')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "bridal-pilot", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "unzip('.', images_zip)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "tested-function", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "metadata = pd.read_csv(metadata)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "heated-arkansas", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "os.mkdir(output_folder)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "drawn-equation", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "folders = metadata[target_column].unique()" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "id": "separate-framing", 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "for folder in folders:\n", 134 | " os.mkdir(output_folder + '/' + folder)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "korean-shield", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "for index, row in metadata.iterrows():\n", 145 | " file_name = row[image_column]\n", 146 | " class_name = row[target_column]\n", 147 | " copyfile(\n", 148 | " source_folder + '/' + file_name, output_folder + '/' + class_name + '/' + file_name\n", 149 | " )" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "realistic-listening", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "!tree data" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "id": "bound-wells", 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "zipdir(data_zip, data)" 170 | ] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 3 (ipykernel)", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.9.6" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 5 194 | } 195 | -------------------------------------------------------------------------------- /component-library/transform/transform-images.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/transform/transform-images.yaml -------------------------------------------------------------------------------- /component-library/util/sparksql-interactive.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "d00fc083", 7 | "metadata": { 8 | "papermill": { 9 | "duration": 0.002622, 10 | "end_time": "2022-10-26T08:27:02.102770", 11 | "exception": false, 12 | "start_time": "2022-10-26T08:27:02.100148", 13 | "status": "completed" 14 | }, 15 | "tags": [] 16 | }, 17 | "source": [ 18 | "# sparksql-interactive" 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "id": "645dc2c4", 25 | "metadata": { 26 | "papermill": { 27 | "duration": 0.004143, 28 | "end_time": "2022-10-26T08:27:02.111011", 29 | "exception": false, 30 | "start_time": "2022-10-26T08:27:02.106868", 31 | "status": "completed" 32 | }, 33 | "tags": [] 34 | }, 35 | "source": [ 36 | "This component creates a container image for running interactive SparkSQL queries on the CLI" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "4e59fe38", 43 | "metadata": { 44 | "papermill": { 45 | "duration": 2.308259, 46 | "end_time": "2022-10-26T08:27:04.423791", 47 | "exception": false, 48 | "start_time": "2022-10-26T08:27:02.115532", 49 | "status": "completed" 50 | }, 51 | "scrolled": true, 52 | "tags": [] 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import os\n", 57 | "\n", 58 | "os.environ['create_image']='True'\n", 59 | "os.environ['repository']='docker.io/romeokienzler'\n", 60 | "os.environ['version']='0.3'" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "dde74e6c", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "if bool(os.environ.get('create_image',False)):\n", 71 | " docker_file=\"\"\"\n", 72 | " FROM debian\n", 73 | " RUN apt update && apt install -y python3-pip procps default-jre\n", 74 | " RUN pip install ipython nbformat pyspark\n", 75 | " \"\"\"\n", 76 | " with open(\"Dockerfile\", \"w\") as text_file:\n", 77 | " text_file.write(docker_file)\n", 78 | "\n", 79 | " !docker build -t claimed-sparksql-interactive:`echo $version` .\n", 80 | " !docker tag claimed-sparksql-interactive:`echo $version` `echo $repository`/claimed-sparksql-interactive:`echo $version`\n", 81 | " !docker push `echo $repository`/claimed-sparksql-interactive:`echo $version`\n", 82 | " !rm Dockerfile" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.11.1" 103 | }, 104 | "papermill": { 105 | "default_parameters": {}, 106 | "duration": 5.223558, 107 | "end_time": "2022-10-26T08:27:05.697642", 108 | "environment_variables": {}, 109 | "exception": null, 110 | "input_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-url.ipynb", 111 | "output_path": "/home/romeokienzler/gitco/claimed/component-library/input/input-url.ipynb", 112 | "parameters": {}, 113 | "start_time": "2022-10-26T08:27:00.474084", 114 | "version": "2.3.3" 115 | }, 116 | "vscode": { 117 | "interpreter": { 118 | "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" 119 | } 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 5 124 | } 125 | -------------------------------------------------------------------------------- /component-library/util/util-cos.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.2 2 | class: CommandLineTool 3 | 4 | baseCommand: "claimed" 5 | 6 | inputs: 7 | component: 8 | type: string 9 | default: docker.io/romeokienzler/claimed-util-cos:0.39 10 | inputBinding: 11 | position: 1 12 | prefix: --component 13 | log_level: 14 | type: string 15 | default: "INFO" 16 | inputBinding: 17 | position: 2 18 | prefix: --log_level 19 | cos_connection: 20 | type: string 21 | default: None 22 | inputBinding: 23 | position: 3 24 | prefix: --cos_connection 25 | local_path: 26 | type: string 27 | default: None 28 | inputBinding: 29 | position: 4 30 | prefix: --local_path 31 | recursive: 32 | type: bool 33 | default: False 34 | inputBinding: 35 | position: 5 36 | prefix: --recursive 37 | operation: 38 | type: string 39 | default: None 40 | inputBinding: 41 | position: 6 42 | prefix: --operation 43 | 44 | 45 | outputs: [] 46 | -------------------------------------------------------------------------------- /component-library/util/util-cos.job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: util-cos 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: util-cos 10 | image: docker.io/romeokienzler/claimed-util-cos:0.39 11 | workingDir: /opt/app-root/src/ 12 | command: ["/opt/app-root/bin/ipython","claimed_util-cos.ipynb"] 13 | env: 14 | - name: log_level 15 | value: value_of_log_level 16 | - name: cos_connection 17 | value: value_of_cos_connection 18 | - name: local_path 19 | value: value_of_local_path 20 | - name: recursive 21 | value: value_of_recursive 22 | - name: operation 23 | value: value_of_operation 24 | restartPolicy: OnFailure 25 | imagePullSecrets: 26 | - name: image_pull_secret -------------------------------------------------------------------------------- /component-library/util/util-cos.py: -------------------------------------------------------------------------------- 1 | """ 2 | COS utility functions 3 | """ 4 | 5 | # pip install aiobotocore botocore s3fs claimed-c3 tqdm 6 | 7 | import os 8 | import s3fs 9 | import logging 10 | import tqdm 11 | from c3.operator_utils import explode_connection_string 12 | 13 | # cos_connection in format: [cos|s3]://access_key_id:secret_access_key@endpoint/bucket/path 14 | cos_connection = os.environ.get('cos_connection', None) 15 | 16 | # access key id (if cos_connection is not provided) 17 | access_key_id = os.environ.get('access_key_id', None) 18 | 19 | # secret access key (if cos_connection is not provided) 20 | secret_access_key = os.environ.get('secret_access_key', None) 21 | 22 | # cos/s3 endpoint (if cos_connection is not provided) 23 | endpoint = os.environ.get('endpoint', None) 24 | 25 | # cos bucket name (if cos_connection is not provided) 26 | bucket_name = os.environ.get('bucket_name', None) 27 | 28 | # cos path (if cos_connection is not provided) 29 | cos_path = os.environ.get('cos_path', None) 30 | 31 | # local path 32 | local_path = os.environ.get('local_path') 33 | 34 | # recursive 35 | recursive = bool(os.environ.get('recursive', 'True')) 36 | 37 | # operation (mkdir|ls|find|download|upload|rm|sync_to_cos|sync_to_local|glob) 38 | operation = os.environ.get('operation') 39 | 40 | # Extract values from connection string 41 | if cos_connection is not None: 42 | (access_key_id, secret_access_key, endpoint, cos_path) = explode_connection_string(cos_connection) 43 | else: 44 | cos_path = os.path.join(bucket_name, cos_path) 45 | 46 | assert access_key_id is not None and secret_access_key is not None and endpoint is not None and cos_path is not None, \ 47 | "Provide a cos_connection (s3://access_key_id:secret_access_key@endpoint/bucket/path) or each value separatly." 48 | 49 | 50 | def main(): 51 | def print_list(l): 52 | for file in l: 53 | print(file) 54 | 55 | s3 = s3fs.S3FileSystem( 56 | anon=False, 57 | key=access_key_id, 58 | secret=secret_access_key, 59 | client_kwargs={'endpoint_url': endpoint} 60 | ) 61 | 62 | if operation == 'mkdir': 63 | logging.info('Make directory ' + cos_path) 64 | s3.mkdir(cos_path) 65 | elif operation == 'ls': 66 | logging.info('List path ' + cos_path) 67 | print_list(s3.ls(cos_path)) 68 | elif operation == 'find': 69 | logging.info('Find path ' + cos_path) 70 | print_list(s3.find(cos_path)) 71 | elif operation == 'upload' and not recursive: 72 | logging.info('Put path ' + cos_path) 73 | print(s3.put(local_path,cos_path)) 74 | elif operation == 'download' and not recursive: 75 | logging.info('Get path ' + cos_path) 76 | s3.get(cos_path, local_path) 77 | elif operation == 'rm': 78 | logging.info('Remove path ' + cos_path) 79 | s3.rm(cos_path, recursive=recursive) 80 | elif operation == 'glob': 81 | logging.info('Glob path ' + cos_path) 82 | print_list(s3.glob(cos_path)) 83 | elif operation == 'sync_to_cos' or operation == 'upload': 84 | logging.info(f'{operation} {local_path} to {cos_path}') 85 | for root, dirs, files in os.walk(local_path, topdown=False): 86 | # Sync files in current folder 87 | for name in tqdm.tqdm(files, desc=root): 88 | file = os.path.join(root, name) 89 | logging.debug(f'processing {file}') 90 | cos_file = os.path.join(cos_path, 91 | os.path.relpath(root, local_path), name).replace('/./', '/') 92 | if operation == 'sync_to_cos' and s3.exists(cos_file): 93 | logging.debug(f'exists {cos_file}') 94 | logging.debug(f's3.info {s3.info(cos_file)}') 95 | if s3.info(cos_file)['size'] != os.path.getsize(file): 96 | logging.debug(f'uploading {file} to {cos_file}') 97 | s3.put(file, cos_file) 98 | else: 99 | logging.debug(f'skipping {file}') 100 | else: 101 | logging.debug(f'uploading {file} to {cos_file}') 102 | s3.put(file, cos_file) 103 | elif operation == 'sync_to_local' or operation == 'download': 104 | logging.info(f'{operation} {cos_path} to {local_path}') 105 | for root, dirs, files in s3.walk(cos_path): 106 | # Sync directories in current folder 107 | for name in dirs: 108 | local_dir = os.path.join(local_path, os.path.relpath(root, cos_path), 109 | name).replace('/./', '/') 110 | if not os.path.isdir(local_dir): 111 | logging.debug(f'create dir {local_dir}') 112 | os.makedirs(local_dir, exist_ok=True, parents=True) 113 | # Sync files in current folder 114 | for name in tqdm.tqdm(files, desc=root): 115 | cos_file = os.path.join(root, name) 116 | local_file = os.path.join(local_path, os.path.relpath(root, cos_path), 117 | name).replace('/./', '/') 118 | logging.debug(f'processing {cos_file}') 119 | if operation == 'sync_to_local' and os.path.isfile(local_file): 120 | logging.debug(f'exists {local_file}') 121 | logging.debug(f's3.info {s3.info(cos_file)}') 122 | if s3.info(cos_file)['size'] != os.path.getsize(local_file): 123 | logging.debug(f'downloading {cos_file} to {local_file}') 124 | s3.get(cos_file, local_file) 125 | else: 126 | logging.info(f'Skipping {cos_file}') 127 | else: 128 | logging.debug(f'downloading {cos_file} to {local_file}') 129 | s3.get(cos_file, local_file) 130 | else: 131 | logging.error(f'Operation unkonwn {operation}') 132 | 133 | 134 | if __name__ == '__main__': 135 | main() 136 | -------------------------------------------------------------------------------- /component-library/util/util-cos.yaml: -------------------------------------------------------------------------------- 1 | name: util-cos 2 | description: "# util-cos This component provides COS utility functions (e.g. creating a bucket, listing contents of a bucket) – CLAIMED V0.1" 3 | 4 | inputs: 5 | - {name: log_level, type: String, description: "update log level", default: "INFO"} 6 | - {name: cos_connection, type: String, description: "cos_connection in format: [cos|s3]://access_key_id:secret_access_key@endpoint/bucket/path"} 7 | - {name: local_path, type: String, description: "local_path for uploads, downloads, sync"} 8 | - {name: recursive, type: Boolean, description: "recursive", default: "False'"} 9 | - {name: operation, type: String, description: "operation (mkdir|ls|find|get|put|rm|sync_to_cos|sync_to_local|glob)"} 10 | 11 | 12 | outputs: 13 | 14 | 15 | implementation: 16 | container: 17 | image: docker.io/romeokienzler/claimed-util-cos:0.39 18 | command: 19 | - sh 20 | - -ec 21 | - | 22 | ipython ./claimed_util-cos.ipynb log_level="${0}" cos_connection="${1}" local_path="${2}" recursive="${3}" operation="${4}" 23 | - {inputValue: log_level} 24 | - {inputValue: cos_connection} 25 | - {inputValue: local_path} 26 | - {inputValue: recursive} 27 | - {inputValue: operation} 28 | -------------------------------------------------------------------------------- /component-library/visualize/map-from-coordinates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "killing-amsterdam", 6 | "metadata": {}, 7 | "source": [ 8 | "# Draws a map based on long/lat data and an associated float\n", 9 | "\n", 10 | "CSV data format requirement: longitude, latitude, value\n", 11 | "\n", 12 | "WARNING: This component currently only supports local execution (not Kubeflow/Airflow) \n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "legislative-database", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# @param data_dir temporal data storage for local execution\n", 23 | "# @param data_csv path and csv file name\n", 24 | "# @param master url of master (default: local mode)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "tribal-newcastle", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "!pip3 install pyspark==3.1.1 folium==0.12.1" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "informed-bonus", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import os\n", 45 | "import pandas as pd\n", 46 | "import folium\n", 47 | "from folium.plugins import HeatMap" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "encouraging-islam", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "data_parquet = os.environ.get('data_parquet', 'data.parquet')\n", 58 | "master = os.environ.get('master', \"local[*]\")\n", 59 | "data_csv = os.environ.get('data_csv', 'data.csv')\n", 60 | "data_dir = os.environ.get('data_dir', '../../data/')" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "accessible-welsh", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "data_csv = 'trends.csv'" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "systematic-aruba", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "df = pd.read_csv(data_dir + data_csv)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "id": "devoted-advertising", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "data = df.reset_index(drop=True).values.tolist()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "correct-example", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "m = folium.Map([48.0, 5.0], zoom_start=6)\n", 101 | "HeatMap(data).add_to(m)\n", 102 | "m" 103 | ] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.8.6" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 5 127 | } 128 | -------------------------------------------------------------------------------- /component-library/visualize/timeseries-runchart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "killing-amsterdam", 6 | "metadata": {}, 7 | "source": [ 8 | "# visualize-timeseries-runchart" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "f3cba135-fe00-45b1-b69d-163019e7c820", 14 | "metadata": {}, 15 | "source": [ 16 | "Plots a run chart of a time series (x time, y value)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "tribal-newcastle", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "!pip3 install pandas==1.3.2 wget==3.2" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "informed-bonus", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import os\n", 37 | "import pandas as pd\n", 38 | "import json\n", 39 | "import logging\n", 40 | "import re\n", 41 | "import sys" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "encouraging-islam", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# CSV file to visualize\n", 52 | "data_csv = os.environ.get('data_csv', 'data.csv')\n", 53 | "\n", 54 | "# x - axis column name\n", 55 | "x_column = os.environ.get('x_column')\n", 56 | "\n", 57 | "# y - axis column name\n", 58 | "y_column = os.environ.get('y_column')\n", 59 | "\n", 60 | "# temporal data storage for local execution\n", 61 | "data_dir = os.environ.get('data_dir', '../../data/')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "id": "387f9744-4ebd-4763-885d-38f8158466b6", 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "parameters = list(\n", 72 | " map(lambda s: re.sub('$', '\"', s),\n", 73 | " map(\n", 74 | " lambda s: s.replace('=', '=\"'),\n", 75 | " filter(\n", 76 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 77 | " sys.argv\n", 78 | " )\n", 79 | " )))\n", 80 | "\n", 81 | "for parameter in parameters:\n", 82 | " logging.warning('Parameter: ' + parameter)\n", 83 | " exec(parameter)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "systematic-aruba", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "df = pd.read_csv(data_dir + data_csv)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "devoted-advertising", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "df.plot(x=x_column, y=y_column)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "d479c7d3-0f2c-4188-8100-63c4c5ffa5c6", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "metadata = {\"outputs\": [{\"storage\": \"inline\", \"source\": \"# TEST AUC:xxxxxx \\n\\n#GBDT AUC:xxxxxxx\", \"type\": \"markdown\"}]}\n", 114 | "metadata = json.dumps(metadata)\n", 115 | "with open(\"/mlpipeline-ui-metadata.json\", \"w\") as text_file:\n", 116 | " text_file.write(metadata)" 117 | ] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.6.8" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 5 141 | } 142 | -------------------------------------------------------------------------------- /component-library/visualize/timeseries-runchart.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claimed-framework/component-library/3176754f8d6bcdda34d9577f82ee6e120f42877e/component-library/visualize/timeseries-runchart.yaml -------------------------------------------------------------------------------- /component-library/visualize/visualize-with-quickchart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "#### Visualize data on quick-chart\n", 9 | "This notebook uses the [QuickChart](https://quickchart.io/) library to visualize data\n", 10 | "- configure the chart data\n", 11 | "- get chart as image\n", 12 | "- show and save the chart as jpg file" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!pip install quickchart.io" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from quickchart import QuickChart\n", 31 | "from PIL import Image\n", 32 | "import requests\n", 33 | "from io import BytesIO\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "import os\n", 36 | "import logging\n", 37 | "import sys\n", 38 | "import re" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Input parameters, chart type\n", 48 | "chart_type = os.environ.get('chart_type')\n", 49 | "\n", 50 | "# chart data \n", 51 | "chart_data = os.environ.get('chart_data')\n", 52 | "\n", 53 | "# chart labels\n", 54 | "chart_labels = os.environ.get('chart_labels')\n", 55 | "\n", 56 | "# chart title\n", 57 | "chart_title = os.environ.get('chart_title')\n", 58 | "\n", 59 | "#chart height\n", 60 | "chart_height = os.environ.get('chart_height')\n", 61 | "\n", 62 | "#chart width\n", 63 | "chart_width = os.environ.get('chart_width')\n", 64 | "\n", 65 | "# path and file name for output\n", 66 | "output_data_image = os.environ.get('output_data_image')\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "parameters = list(\n", 76 | " map(lambda s: re.sub('$', '\"', s),\n", 77 | " map(\n", 78 | " lambda s: s.replace('=', '=\"'),\n", 79 | " filter(\n", 80 | " lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n", 81 | " sys.argv\n", 82 | " )\n", 83 | " )))\n", 84 | "\n", 85 | "for parameter in parameters:\n", 86 | " logging.warning('Parameter: ' + parameter)\n", 87 | " exec(parameter)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# Define your chart data configuration\n", 97 | "qc = QuickChart()\n", 98 | "qc.height = chart_height\n", 99 | "qc.width = chart_width\n", 100 | "qc.config = {\n", 101 | " \"type\": chart_type,\n", 102 | " \"data\": {\n", 103 | " \"labels\": chart_labels,\n", 104 | " \"datasets\": [{\n", 105 | " \"label\": chart_title,\n", 106 | " \"data\": chart_data\n", 107 | " }]\n", 108 | " }\n", 109 | "}" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# print the chart url\n", 119 | "print(qc.get_short_url())" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Show the chart in this notebook, send a GET request to the image URL and retrieve the image\n", 129 | "response = requests.get(qc.get_short_url())\n", 130 | "image = Image.open(BytesIO(response.content))\n", 131 | "\n", 132 | "# Display the image\n", 133 | "plt.imshow(image)\n", 134 | "plt.axis('off')\n", 135 | "plt.show()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "#To save the image, convert the image to the 'RGB' mode if necessary\n", 145 | "if image.mode != 'RGB':\n", 146 | " image = image.convert('RGB')\n", 147 | " \n", 148 | "# Save the image to the specified file\n", 149 | "image.save(output_data_image)\n", 150 | "\n", 151 | "print(\"Image saved successfully!\")" 152 | ] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "base", 158 | "language": "python", 159 | "name": "python3" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 3 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython3", 171 | "version": "3.10.9" 172 | }, 173 | "orig_nbformat": 4 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 2 177 | } 178 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='claimed-component-library', 4 | version='0.1', 5 | description='CLAIMED component library', 6 | url='https://github.com/claimed-framework/component-library', 7 | author='The CLAIMED authors', 8 | author_email='claimed-framework@proton.me', 9 | license='Apache2', 10 | packages=['component-library'], 11 | zip_safe=False) 12 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | ipython==8.7.0 2 | nbformat==5.7.0 3 | --------------------------------------------------------------------------------