├── figures ├── overview.jpeg ├── How datasets are managed.png └── batch evaluation pipeline.png ├── notebooks ├── test-images │ ├── bird_0000.jpg │ ├── cat_0000.jpg │ ├── deer_0000.jpg │ ├── dog_0000.jpg │ ├── frog_0000.jpg │ ├── ship_0000.jpg │ ├── horse_0000.jpg │ ├── truck_0000.jpg │ ├── airplane_0000.jpg │ └── automobile_0000.jpg ├── README.md ├── 98_Batch_Prediction_Test.ipynb ├── 04_Cloud_Scheduler_Trigger.ipynb ├── 02_TFX_Training_Pipeline.ipynb └── 01_Dataset_Prep.ipynb ├── .github └── workflows │ └── lint.yml ├── custom_components ├── file_list_gen.py ├── training_pipeline_trigger.py ├── batch_pred_evaluator.py ├── batch_prediction_vertex.py └── span_preparator.py ├── README.md └── LICENSE /figures/overview.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/overview.jpeg -------------------------------------------------------------------------------- /notebooks/test-images/bird_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/bird_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/cat_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/cat_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/deer_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/deer_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/dog_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/dog_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/frog_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/frog_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/ship_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/ship_0000.jpg -------------------------------------------------------------------------------- /figures/How datasets are managed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/How datasets are managed.png -------------------------------------------------------------------------------- /figures/batch evaluation pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/batch evaluation pipeline.png -------------------------------------------------------------------------------- /notebooks/test-images/horse_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/horse_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/truck_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/truck_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/airplane_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/airplane_0000.jpg -------------------------------------------------------------------------------- /notebooks/test-images/automobile_0000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/automobile_0000.jpg -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: black-action 2 | on: [push, pull_request] 3 | jobs: 4 | linter_name: 5 | name: runner / black 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | - name: Check files using the black formatter 10 | uses: rickstaa/action-black@v1 11 | id: action_black 12 | with: 13 | black_args: ". --check --diff" 14 | - name: Annotate diff changes using reviewdog 15 | if: steps.action_black.outputs.is_formatted == 'true' 16 | uses: reviewdog/action-suggester@v1 17 | with: 18 | tool_name: blackfmt 19 | -------------------------------------------------------------------------------- /custom_components/file_list_gen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate a txt file formatted required by Vertex AI's Batch Prediction 3 | There are few options, and this component generate "file list" formatted txt. 4 | (https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions) 5 | """ 6 | 7 | import tensorflow as tf 8 | from absl import logging 9 | 10 | from tfx.dsl.component.experimental.decorators import component 11 | from tfx.dsl.component.experimental.annotations import Parameter, OutputArtifact 12 | from tfx.types.standard_artifacts import String 13 | 14 | 15 | @component 16 | def FileListGen( 17 | outpath: OutputArtifact[String], 18 | gcs_src_bucket: Parameter[str], 19 | gcs_src_prefix: Parameter[str] = "", 20 | output_filename: Parameter[str] = "test-images.txt", 21 | ): 22 | """ 23 | : param outpath: OutputArtifact to hold where output_filename will be located 24 | This will be used in the downstream component, BatchPredictionGen 25 | : param gcs_src_bucket: GCS bucket name where the list of raw data is 26 | : param gcs_src_prefix: prefix to be added to gcs_src_bucket 27 | : param output_filename: output filename whose content is a list of file paths of raw data 28 | """ 29 | logging.info("FileListGen started") 30 | 31 | # 1. get the list of data 32 | gcs_src_prefix = ( 33 | f"{gcs_src_prefix}/" if len(gcs_src_prefix) != 0 else gcs_src_prefix 34 | ) 35 | img_paths = tf.io.gfile.glob(f"gs://{gcs_src_bucket}/{gcs_src_prefix}*.jpg") 36 | logging.info("Successfully retrieve the file(jpg) list from GCS path") 37 | 38 | # 2. write the list of data in the expected format in Vertex AI Batch Prediction to a local file 39 | with open(output_filename, "w", encoding="utf-8") as f: 40 | f.writelines("%s\n" % img_path for img_path in img_paths) 41 | logging.info( 42 | f"Successfully created the file list file({output_filename}) in local storage" 43 | ) 44 | 45 | # 3. upload the local file to GCS location 46 | gcs_dst = f"{gcs_src_bucket}/{gcs_src_prefix}{output_filename}" 47 | tf.io.gfile.copy(output_filename, f"gs://{gcs_dst}", overwrite=True) 48 | logging.info(f"Successfully uploaded the file list ({gcs_dst})") 49 | 50 | # 4. store the GCS location where the local file is 51 | outpath.value = gcs_dst 52 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | Notebook | Description | Colab Link 2 | --- | --- | --- | 3 | 01_Dataset_Prep.ipynb | Download CIFAR10 TFRecord from TFDS (TensorFlow Dataset) and upload it to GCS Bucket. It makes sure the directory structure follows `/span-{SPAN}/[train\|val]/*.tfrecord` format. It is primarily need for the initial model training as shown in the `02_TFX_Training_Pipeline.ipynb` notebook. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/01_Dataset_Prep.ipynb) 4 | 02_TFX_Training_Pipeline.ipynb | Build TFX pipeline that can be run on Vertex AI Pipeline. `ExampleGen`, `Trainer`, and `Pusher` components are included. The trained model is also deployed on Vertex AI and can be consumed by developers via API calls. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/02_TFX_Training_Pipeline.ipynb) 5 | 03_Batch_Prediction_Pipeline.ipynb | Build KFP pipeline that runs batch prediction on the trained model obtained from `02_TFX_Training_Pipeline.ipynb`. Then the `TFX Training Pipeline` gets triggered based on the evaluation of the predicted results. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/03_Batch_Prediction_Pipeline.ipynb) 6 | 04_Cloud_Scheduler_Trigger.ipynb | Create/Publish Pub/Sub topic, and deploy Cloud Function listening to the Pub/Sub topic to trigger the batch prediction pipeline. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb) 7 | 97_Prepare_Test_Images.ipynb | Download test images from [Bing](https://www.bing.com/) to simulate data drift. Downloaded images will be moved into a GCS bucket. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/97_Prepare_Test_Images.ipynb) 8 | 98_Batch_Prediction_Test.ipynb | Makes a batch prediction on a deployed model via Vertex AI Prediction. It measures the model performance (accuracy) on new data. | [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/98_Batch_Prediction_Test.ipynb) 9 | -------------------------------------------------------------------------------- /custom_components/training_pipeline_trigger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Component responsible for triggering a training job given a pipeline specification. 3 | """ 4 | 5 | import json 6 | 7 | from google.cloud import storage 8 | 9 | from kfp.v2.google.client import AIPlatformClient 10 | from tfx.dsl.component.experimental.annotations import Parameter, InputArtifact 11 | from tfx.dsl.component.experimental.decorators import component 12 | from tfx.types.experimental.simple_artifacts import Dataset 13 | 14 | from absl import logging 15 | 16 | 17 | @component 18 | def PipelineTrigger( 19 | is_retrain: InputArtifact[Dataset], 20 | latest_span_id: InputArtifact[Dataset], 21 | pipeline_spec_path: Parameter[str], 22 | project_id: Parameter[str], 23 | region: Parameter[str], 24 | ): 25 | """ 26 | :param is_retrain: Boolean to indicate if we are retraining. 27 | :param latest_span_id: Latest span id to craft training data for the model. 28 | :param pipeline_spec_path: Training pipeline specification path. 29 | :param project_id: GCP project id. 30 | :param region: GCP region. 31 | """ 32 | if is_retrain.get_string_custom_property("result") == "False": 33 | # Check if the pipeline spec exists. 34 | storage_client = storage.Client() 35 | 36 | path_parts = pipeline_spec_path.replace("gs://", "").split("/") 37 | bucket_name = path_parts[0] 38 | blob_name = "/".join(path_parts[1:]) 39 | 40 | bucket = storage_client.bucket(bucket_name) 41 | blob = storage.Blob(bucket=bucket, name=blob_name) 42 | 43 | if not blob.exists(storage_client): 44 | raise ValueError(f"{pipeline_spec_path} does not exist.") 45 | 46 | # Initialize Vertex AI API client and submit for pipeline execution. 47 | api_client = AIPlatformClient(project_id=project_id, region=region) 48 | 49 | # Fetch the latest span. 50 | latest_span = latest_span_id.get_string_custom_property("latest_span") 51 | 52 | # Create a training job from pipeline spec. 53 | response = api_client.create_run_from_job_spec( 54 | pipeline_spec_path, 55 | enable_caching=False, 56 | parameter_values={ 57 | "input-config": json.dumps( 58 | { 59 | "splits": [ 60 | { 61 | "name": "train", 62 | "pattern": f"span-[{int(latest_span)-1}{latest_span}]/train/*.tfrecord", 63 | }, 64 | { 65 | "name": "val", 66 | "pattern": f"span-[{int(latest_span)-1}{latest_span}]/test/*.tfrecord", 67 | }, 68 | ] 69 | } 70 | ), 71 | "output-config": json.dumps({}), 72 | }, 73 | ) 74 | logging.info(response) 75 | -------------------------------------------------------------------------------- /custom_components/batch_pred_evaluator.py: -------------------------------------------------------------------------------- 1 | """ 2 | This component evaluates the performance of a currently deployed model, and 3 | the evaluation is based on the result of batch prediction on Vertex AI from the previous component. 4 | At the end, this component will output true or false to indicate if retraining is needed. 5 | Reference: https://bit.ly/vertex-batch 6 | """ 7 | 8 | from tfx.dsl.component.experimental.annotations import Parameter, OutputArtifact 9 | from tfx.dsl.component.experimental.decorators import component 10 | from tfx.types.experimental.simple_artifacts import Dataset 11 | 12 | from absl import logging 13 | import os 14 | import json 15 | 16 | 17 | @component 18 | def PerformanceEvaluator( 19 | gcs_destination: Parameter[str], 20 | local_directory: Parameter[str], 21 | threshold: Parameter[float], 22 | trigger_pipeline: OutputArtifact[Dataset], 23 | ): 24 | """ 25 | gcs_destination: GCS location where the files containing 26 | the result of batch prediction is 27 | local_directory: Temporary directory to hold files copied 28 | from the gcs_destination 29 | threshold: threshold to decide if retraining is needed or not 30 | it is based on the measured accuracy 31 | trigger_pipeline: an output artifact which hold true or false 32 | to indicate if retraining is needed or not 33 | """ 34 | 35 | full_gcs_results_dir = f"{gcs_destination}/{local_directory}" 36 | 37 | # Create missing directories. 38 | os.makedirs(local_directory, exist_ok=True) 39 | 40 | # Get the Cloud Storage paths for each result. 41 | os.system(f"gsutil -m cp -r {full_gcs_results_dir} {local_directory}") 42 | 43 | # Get most recently modified directory. 44 | latest_directory = max( 45 | [os.path.join(local_directory, d) for d in os.listdir(local_directory)], 46 | key=os.path.getmtime, 47 | ) 48 | 49 | # Get downloaded results in directory. 50 | results_files = [] 51 | for dirpath, subdirs, files in os.walk(latest_directory): 52 | for file in files: 53 | if file.startswith("prediction.results"): 54 | results_files.append(os.path.join(dirpath, file)) 55 | 56 | # Consolidate all the results into a list. 57 | results = [] 58 | for results_file in results_files: 59 | # Download each result. 60 | with open(results_file, "r") as file: 61 | results.extend([json.loads(line) for line in file.readlines()]) 62 | 63 | # Calculate performance. 64 | num_correct = 0 65 | 66 | for result in results: 67 | label = os.path.basename(result["instance"]).split("_")[0] 68 | prediction = result["prediction"]["label"] 69 | 70 | if label == prediction: 71 | num_correct = num_correct + 1 72 | 73 | accuracy = num_correct / len(results) 74 | logging.info(f"Accuracy: {accuracy*100}%") 75 | 76 | # Store the boolean result. 77 | trigger_pipeline.set_string_custom_property("result", str(accuracy >= threshold)) 78 | -------------------------------------------------------------------------------- /custom_components/batch_prediction_vertex.py: -------------------------------------------------------------------------------- 1 | """ 2 | This component launches a Batch Prediction job on Vertex AI. 3 | Know more about Vertex AI Batch Predictions jobs, go here: 4 | https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions. 5 | """ 6 | 7 | from google.cloud import storage 8 | 9 | from tfx.dsl.component.experimental.annotations import Parameter, InputArtifact 10 | from tfx.dsl.component.experimental.decorators import component 11 | from tfx.types.standard_artifacts import String 12 | import google.cloud.aiplatform as vertex_ai 13 | 14 | from absl import logging 15 | 16 | 17 | @component 18 | def BatchPredictionGen( 19 | gcs_source: InputArtifact[String], 20 | project: Parameter[str], 21 | location: Parameter[str], 22 | model_resource_name: Parameter[str], 23 | job_display_name: Parameter[str], 24 | gcs_destination: Parameter[str], 25 | instances_format: Parameter[str] = "file-list", 26 | machine_type: Parameter[str] = "n1-standard-2", 27 | accelerator_count: Parameter[int] = 0, 28 | accelerator_type: Parameter[str] = None, 29 | starting_replica_count: Parameter[int] = 1, 30 | max_replica_count: Parameter[int] = 1, 31 | ): 32 | """ 33 | gcs_source: A location inside GCS to be used by the Batch Prediction job to get its inputs. 34 | Rest of the parameters are explained here: https://git.io/JiUyU. 35 | """ 36 | storage_client = storage.Client() 37 | 38 | # Read GCS Source (gcs_source contains the full path of GCS object). 39 | # 1-1. get bucketname from gcs_source 40 | gcs_source_uri = gcs_source.uri.split("//")[1:][0].split("/") 41 | bucketname = gcs_source_uri[0] 42 | bucket = storage_client.get_bucket(bucketname) 43 | logging.info(f"bucketname: {bucketname}") 44 | 45 | # 1-2. get object path without the bucket name. 46 | objectpath = "/".join(gcs_source_uri[1:]) 47 | 48 | # 1-3. read the object to get value set by OutputArtifact from FileListGen. 49 | blob = bucket.blob(objectpath) 50 | logging.info(f"objectpath: {objectpath}") 51 | 52 | gcs_source = f"gs://{blob.download_as_text()}" 53 | 54 | # Get Model. 55 | vertex_ai.init(project=project, location=location) 56 | model = vertex_ai.Model.list( 57 | filter=f"display_name={model_resource_name}", order_by="update_time" 58 | )[-1] 59 | 60 | # Launch a Batch Prediction job. 61 | logging.info("Starting batch prediction job.") 62 | logging.info(f"GCS path where file list is: {gcs_source}") 63 | batch_prediction_job = model.batch_predict( 64 | job_display_name=job_display_name, 65 | instances_format=instances_format, 66 | gcs_source=gcs_source, 67 | gcs_destination_prefix=gcs_destination, 68 | machine_type=machine_type, 69 | accelerator_count=accelerator_count, 70 | accelerator_type=accelerator_type, 71 | starting_replica_count=starting_replica_count, 72 | max_replica_count=max_replica_count, 73 | sync=True, 74 | ) 75 | 76 | logging.info(batch_prediction_job.display_name) 77 | logging.info(batch_prediction_job.resource_name) 78 | logging.info(batch_prediction_job.state) 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Continuous Adaptation for Machine Learning System to Data Changes ([#TFCommunitySpotlight Awarded](https://twitter.com/TensorFlow/status/1469019016782041095?s=20)) 2 | 3 | _By [Chansung Park](https://github.com/deep-diver) and [Sayak Paul](https://github.com/sayakpaul)_ 4 | 5 | ![](figures/overview.jpeg) 6 | 7 | MLOps system evolves according to the changes of the world, and that is usually caused by [data/concept drift](https://en.wikipedia.org/wiki/Concept_drift). This project shows how to combine two separate pipelines, one for batch prediction and the other for training to adapt to data changes. We worked with the TFX team to author a blog post detailing our approach. The blog post is available here: https://blog.tensorflow.org/2021/12/continuous-adaptation-for-machine.html. 8 | 9 | We assume familiarity with basic MLOps concepts (like pipelines, data drift, batch predictions, etc.), TensorFlow, TensorFlow Extedned, and Vertex AI from the reader. 10 | 11 | MLOps system also can be evolved when much better algorithm (i.e. state-of-the-art model) comes out. In that case, the system should apply a better algorithm to understand the existing data better. We have demonstrated such workflows in the following projects: 12 | 13 | * Model Training as a CI/CD System Part1: Reflect changes in codebase to MLOps pipeline: [Code on GitHub](https://github.com/deep-diver/Model-Training-as-a-CI-CD-System), [Article on the GCP blog](https://cloud.google.com/blog/topics/developers-practitioners/model-training-cicd-system-part-i) 14 | * Model Training as a CI/CD System Part2: Trigger, schedule, and run MLOps pipelines: [Code on GitHub](https://github.com/sayakpaul/CI-CD-for-Model-Training), [Article on the GCP blog](https://cloud.google.com/blog/topics/developers-practitioners/model-training-cicd-system-part-ii) 15 | 16 | ## Workflow 17 | 18 | 1. Run the initial training pipeline to train an image classifier and deploy it using TensorFlow, TFX, and Vertex AI ([`02_TFX_Training_Pipeline.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/02_TFX_Training_Pipeline.ipynb)). 19 | 2. Download and prepare images from Bing search to simulate the data drift ([`97_Prepare_Test_Images.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/97_Prepare_Test_Images.ipynb)). 20 | 3. Generate batch prediction pipeline specification (JSON) ([`03_Batch_Prediction_Pipeline.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/03_Batch_Prediction_Pipeline.ipynb)). 21 | 4. Deploy cloud function to watch if there are enough sample data to perform batch prediction pipeline and to trigger the batch prediction pipeline ([`04_Cloud_Scheduler_Trigger.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb)). 22 | 5. Schedule a periodic job to run the deployed cloud function ([`04_Cloud_Scheduler_Trigger.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb)). 23 | 24 | ## Custom components 25 | 26 | We developed several custom components in TFX for this project. You can find them under the [`custom_components`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/tree/main/custom_components) directory. 27 | 28 | ## Checklist 29 | 30 | - [X] Initial Data Preparation (CIFAR10) 31 | - [X] Build Training Pipeline 32 | - [X] Build Batch Prediction Pipeline 33 | - [X] FileListGen component 34 | - [X] BatchPredictionGen component 35 | - [X] PerformanceEvaluator component 36 | - [X] SpanPreparator component 37 | - [X] PipelineTrigger component 38 | - [X] Data Preparation for Data/Concept Drift Simulation (from Bing) 39 | - [X] Deploy Cloud Function, Schedule a Job to Trigger the Cloud Function 40 | - [X] End to End Test 41 | 42 | ## Feedback 43 | 44 | We welcome feedback. Please create an issue to let us know what you think. 45 | 46 | ## Acknowledgements 47 | 48 | * [ML-GDE program](https://developers.google.com/programs/experts/) for providing GCP credits. 49 | * Robert Crowe and Jiayi Zhao of Google for helping us with our technical doubts. 50 | -------------------------------------------------------------------------------- /custom_components/span_preparator.py: -------------------------------------------------------------------------------- 1 | """ 2 | This component is responsible for separating provided samples into training and 3 | validation splits. It then converts them to TFRecords and stores those inside 4 | a GCS location. Finally, it returns the latest span id calculated from the current 5 | samples in `gcs_source_bucket`. 6 | """ 7 | 8 | from tfx.dsl.component.experimental.decorators import component 9 | from tfx.dsl.component.experimental.annotations import Parameter 10 | from tfx.dsl.component.experimental.annotations import OutputArtifact, InputArtifact 11 | from tfx.types.experimental.simple_artifacts import Dataset 12 | from absl import logging 13 | 14 | from datetime import datetime 15 | import tensorflow as tf 16 | import random 17 | import os 18 | 19 | 20 | # Label-mapping. 21 | LABEL_DICT = { 22 | "airplane": 0, 23 | "automobile": 1, 24 | "bird": 2, 25 | "cat": 3, 26 | "deer": 4, 27 | "dog": 5, 28 | "frog": 6, 29 | "horse": 7, 30 | "ship": 8, 31 | "truck": 9, 32 | } 33 | 34 | 35 | # Images are byte-strings. 36 | def _bytestring_feature(list_of_bytestrings): 37 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings)) 38 | 39 | 40 | # Classes would be integers. 41 | def _int_feature(list_of_ints): 42 | return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints)) 43 | 44 | 45 | # Function that prepares a record for the tfrecord file 46 | # a record contains the image and its label. 47 | def to_tfrecord(img_bytes, label): 48 | feature = { 49 | "image": _bytestring_feature([img_bytes]), 50 | "label": _int_feature([label]), 51 | } 52 | return tf.train.Example(features=tf.train.Features(feature=feature)) 53 | 54 | 55 | def write_tfrecords(filepaths, dest_gcs, tfrecord_filename, new_span, is_train): 56 | # For this project, we are serializing the images in one TFRecord only. 57 | # For more realistic purposes, this should be sharded. 58 | folder = "train" if is_train else "test" 59 | 60 | with tf.io.TFRecordWriter(tfrecord_filename) as writer: 61 | for path in filepaths: 62 | image_string = tf.io.read_file(path).numpy() 63 | class_name = path.split("/")[-1].split("_")[0] 64 | label = LABEL_DICT[class_name] 65 | 66 | example = to_tfrecord(image_string, label) 67 | writer.write(example.SerializeToString()) 68 | 69 | # Copy over the zipped TFRecord file to the GCS Bucket and 70 | # remove the temporary files. 71 | logging.info(f"gsutil cp {tfrecord_filename} {dest_gcs}/span-{new_span}/{folder}/") 72 | os.system(f"gsutil cp {tfrecord_filename} {dest_gcs}/span-{new_span}/{folder}/") 73 | os.remove(tfrecord_filename) 74 | 75 | 76 | @component 77 | def SpanPreparator( 78 | is_retrain: InputArtifact[Dataset], 79 | gcs_source_bucket: Parameter[str], 80 | gcs_destination_bucket: Parameter[str], 81 | latest_span_id: OutputArtifact[Dataset], 82 | gcs_source_prefix: Parameter[str] = "", 83 | ): 84 | """ 85 | :param is_retrain: Boolean to indicate if we are retraining. 86 | :param gcs_source_bucket: GCS location where the entry samples are residing. 87 | :param gcs_destination_bucket: GCS location where the converted TFRecords will be serialized. 88 | :param latest_span_id: Data span. 89 | :param gcs_source_prefix: Location prefix. 90 | """ 91 | if is_retrain.get_string_custom_property("result") == "False": 92 | # Get the latest span and determine the new span. 93 | last_span_str = tf.io.gfile.glob(f"{gcs_destination_bucket}/span-*")[-1] 94 | last_span = int(last_span_str.split("-")[-1]) 95 | new_span = last_span + 1 96 | 97 | timestamp = datetime.utcnow().strftime("%y%m%d-%H%M%S") 98 | 99 | # Get images from the provided GCS source. 100 | image_paths = tf.io.gfile.glob(f"gs://{gcs_source_bucket}/*.jpg") 101 | logging.info(image_paths) 102 | random.shuffle(image_paths) 103 | 104 | # Create train and validation splits. 105 | val_split = 0.2 106 | split_index = int(len(image_paths) * (1 - val_split)) 107 | training_paths = image_paths[:split_index] 108 | validation_paths = image_paths[split_index:] 109 | 110 | # Write as TFRecords. 111 | write_tfrecords( 112 | training_paths, 113 | gcs_destination_bucket, 114 | tfrecord_filename=f"new_training_data_{timestamp}.tfrecord", 115 | new_span=new_span, 116 | is_train=True, 117 | ) 118 | write_tfrecords( 119 | validation_paths, 120 | gcs_destination_bucket, 121 | tfrecord_filename=f"new_validation_data_{timestamp}.tfrecord", 122 | new_span=new_span, 123 | is_train=False, 124 | ) 125 | 126 | logging.info("Removing images from batch prediction bucket.") 127 | os.system( 128 | f"gsutil mv gs://{gcs_source_bucket}/{gcs_source_prefix} gs://{gcs_source_bucket}/{gcs_source_prefix}_old" 129 | ) 130 | latest_span_id.set_string_custom_property("latest_span", str(new_span)) 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /notebooks/98_Batch_Prediction_Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "msGXve8btxnH" 17 | }, 18 | "source": [ 19 | "## Outline\n", 20 | "1. Upload the data to the designated GCS bucket\n", 21 | " - The data is stored in GCS bucket to simulate a real world scenario. In reality, data is collected in a central location(i.e. GCS bucket), and it will be used measure the model performance. We can measure the model performance much more reliable on a batch data than a single data(online)\n", 22 | "2. Perform batch prediction\n", 23 | "3. Measure the model performance(accuracy) on the data " 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Setup" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 21, 36 | "metadata": { 37 | "id": "heCy5KIqGmN5" 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "!pip install -q --upgrade google-cloud-aiplatform\n", 42 | "!pip install -q --upgrade google-cloud-storage" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "colab": { 50 | "base_uri": "https://localhost:8080/" 51 | }, 52 | "id": "P28WFWH2GpwG", 53 | "outputId": "722815da-315d-4e1e-dc4b-b121ec397f8a" 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "!gcloud init" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 44, 63 | "metadata": { 64 | "id": "6v_P4FAjGuGf" 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "from google.colab import auth\n", 69 | "\n", 70 | "auth.authenticate_user()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "## Set Environment Values for GCP" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 57, 83 | "metadata": { 84 | "id": "2Y4ZMVcLHHkX" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "GOOGLE_CLOUD_PROJECT = \"central-hangar-321813\" # @param {type:\"string\"}\n", 89 | "GOOGLE_CLOUD_REGION = \"us-central1\" # @param {type:\"string\"}\n", 90 | "\n", 91 | "MODEL_NAME = \"resnet_cifar_latest\" # @param {type:\"string\"}\n", 92 | "\n", 93 | "TEST_FILENAME = \"test-images.txt\" # @param {type:\"string\"}\n", 94 | "TEST_GCS_BUCKET = \"gs://batch-prediction-collection\" # @param {type:\"string\"}\n", 95 | "TEST_LOCAL_PATH = \"Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images\" # @param {type:\"string\"}" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "## Clone the Repository to Obtain Test Images\n", 103 | "- There are only 10 image files for simple testing purpose" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": { 110 | "colab": { 111 | "base_uri": "https://localhost:8080/" 112 | }, 113 | "id": "KHcBvji7Rbrs", 114 | "outputId": "31cefc83-90c1-4568-87b3-66e1bfba6384" 115 | }, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "Cloning into 'Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes'...\n", 122 | "remote: Enumerating objects: 100, done.\u001b[K\n", 123 | "remote: Counting objects: 100% (100/100), done.\u001b[K\n", 124 | "remote: Compressing objects: 100% (78/78), done.\u001b[K\n", 125 | "remote: Total 100 (delta 59), reused 38 (delta 21), pack-reused 0\u001b[K\n", 126 | "Receiving objects: 100% (100/100), 57.61 KiB | 14.40 MiB/s, done.\n", 127 | "Resolving deltas: 100% (59/59), done.\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "!git clone https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes.git" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 58, 138 | "metadata": { 139 | "colab": { 140 | "base_uri": "https://localhost:8080/" 141 | }, 142 | "id": "GDexvxTXRfWH", 143 | "outputId": "2bd5f4d9-588d-414e-e819-0f8969034e29" 144 | }, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "['frog_0000.jpg',\n", 150 | " 'truck_0000.jpg',\n", 151 | " 'dog_0000.jpg',\n", 152 | " 'cat_0000.jpg',\n", 153 | " 'ship_0000.jpg',\n", 154 | " 'deer_0000.jpg',\n", 155 | " 'bird_0000.jpg',\n", 156 | " 'horse_0000.jpg',\n", 157 | " 'automobile_0000.jpg',\n", 158 | " 'airplane_0000.jpg']" 159 | ] 160 | }, 161 | "execution_count": 58, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "from os import listdir\n", 168 | "\n", 169 | "test_files = listdir(TEST_LOCAL_PATH)\n", 170 | "test_files" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "## Create Import File to be Injected into Batch Prediction\n", 178 | "- Batch request input should follow a certain format in Vertex AI Prediction. JSONL, TFRecord, CSV, file list formats are available([link](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions#batch_request_input)), and file list format is used in this notebook" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 64, 184 | "metadata": { 185 | "id": "OJR9wy6GXyJv" 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "f = open(TEST_FILENAME, \"w\")\n", 190 | "\n", 191 | "for filename in test_files:\n", 192 | " f.write(f\"{TEST_GCS_BUCKET}/{filename}\\n\")\n", 193 | "\n", 194 | "f.close()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 65, 200 | "metadata": { 201 | "colab": { 202 | "base_uri": "https://localhost:8080/" 203 | }, 204 | "id": "mmRePB6aYRfR", 205 | "outputId": "2ea2566d-a1a0-4dae-c7ef-6152ea6f79cc" 206 | }, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "gs://batch-prediction-collection/frog_0000.jpg\n", 213 | "gs://batch-prediction-collection/truck_0000.jpg\n", 214 | "gs://batch-prediction-collection/dog_0000.jpg\n", 215 | "gs://batch-prediction-collection/cat_0000.jpg\n", 216 | "gs://batch-prediction-collection/ship_0000.jpg\n", 217 | "gs://batch-prediction-collection/deer_0000.jpg\n", 218 | "gs://batch-prediction-collection/bird_0000.jpg\n", 219 | "gs://batch-prediction-collection/horse_0000.jpg\n", 220 | "gs://batch-prediction-collection/automobile_0000.jpg\n", 221 | "gs://batch-prediction-collection/airplane_0000.jpg\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "!cat {TEST_FILENAME}" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## Copy Test Images and Import File to GCS Bucket" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 66, 239 | "metadata": { 240 | "colab": { 241 | "base_uri": "https://localhost:8080/" 242 | }, 243 | "id": "WdeuJt5YYVPw", 244 | "outputId": "d34b6904-a954-4a25-ba64-58d560f063f3" 245 | }, 246 | "outputs": [ 247 | { 248 | "name": "stdout", 249 | "output_type": "stream", 250 | "text": [ 251 | "Copying file://test-images.txt [Content-Type=text/plain]...\n", 252 | "/ [0/1 files][ 0.0 B/ 480.0 B] 0% Done \r", 253 | "/ [1/1 files][ 480.0 B/ 480.0 B] 100% Done \r\n", 254 | "Operation completed over 1 objects/480.0 B. \n", 255 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/dog_0000.jpg [Content-Type=image/jpeg]...\n", 256 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/automobile_0000.jpg [Content-Type=image/jpeg]...\n", 257 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/cat_0000.jpg [Content-Type=image/jpeg]...\n", 258 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/frog_0000.jpg [Content-Type=image/jpeg]...\n", 259 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/bird_0000.jpg [Content-Type=image/jpeg]...\n", 260 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/airplane_0000.jpg [Content-Type=image/jpeg]...\n", 261 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/horse_0000.jpg [Content-Type=image/jpeg]...\n", 262 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/deer_0000.jpg [Content-Type=image/jpeg]...\n", 263 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/truck_0000.jpg [Content-Type=image/jpeg]...\n", 264 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/ship_0000.jpg [Content-Type=image/jpeg]...\n", 265 | "/ [10/10 files][ 9.2 KiB/ 9.2 KiB] 100% Done \n", 266 | "Operation completed over 10 objects/9.2 KiB. \n" 267 | ] 268 | } 269 | ], 270 | "source": [ 271 | "!gsutil -m cp -r {TEST_FILENAME} {TEST_GCS_BUCKET}\n", 272 | "!gsutil -m cp -r {TEST_LOCAL_PATH}/*.jpg {TEST_GCS_BUCKET}" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": { 278 | "id": "-mVFSRUQHdEj" 279 | }, 280 | "source": [ 281 | "## Batch Prediction" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 68, 287 | "metadata": { 288 | "id": "z_0QXPfcHc3p" 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "import google.cloud.aiplatform as aiplatform\n", 293 | "from typing import Union, Sequence\n", 294 | "\n", 295 | "\n", 296 | "def create_batch_prediction_job_dedicated_resources_sample(\n", 297 | " project: str,\n", 298 | " location: str,\n", 299 | " model_resource_name: str,\n", 300 | " job_display_name: str,\n", 301 | " gcs_source: Union[str, Sequence[str]],\n", 302 | " gcs_destination: str,\n", 303 | " instances_format: str = \"file-list\",\n", 304 | " machine_type: str = \"n1-standard-2\",\n", 305 | " accelerator_count: int = 1,\n", 306 | " accelerator_type: str = \"NVIDIA_TESLA_K80\",\n", 307 | " starting_replica_count: int = 1,\n", 308 | " max_replica_count: int = 1,\n", 309 | " sync: bool = True,\n", 310 | "):\n", 311 | " aiplatform.init(project=project, location=location)\n", 312 | "\n", 313 | " my_model = aiplatform.Model(model_resource_name)\n", 314 | "\n", 315 | " batch_prediction_job = my_model.batch_predict(\n", 316 | " job_display_name=job_display_name,\n", 317 | " instances_format=instances_format,\n", 318 | " gcs_source=gcs_source,\n", 319 | " gcs_destination_prefix=gcs_destination,\n", 320 | " machine_type=machine_type,\n", 321 | " accelerator_count=accelerator_count,\n", 322 | " accelerator_type=accelerator_type,\n", 323 | " starting_replica_count=starting_replica_count,\n", 324 | " max_replica_count=max_replica_count,\n", 325 | " sync=sync,\n", 326 | " )\n", 327 | "\n", 328 | " batch_prediction_job.wait()\n", 329 | "\n", 330 | " print(batch_prediction_job.display_name)\n", 331 | " print(batch_prediction_job.resource_name)\n", 332 | " print(batch_prediction_job.state)\n", 333 | " return batch_prediction_job" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 69, 339 | "metadata": { 340 | "id": "BkAJqucqaw_r" 341 | }, 342 | "outputs": [], 343 | "source": [ 344 | "from datetime import datetime\n", 345 | "\n", 346 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 70, 352 | "metadata": { 353 | "colab": { 354 | "base_uri": "https://localhost:8080/" 355 | }, 356 | "id": "Hb1g6qdNLTfP", 357 | "outputId": "442d7ada-b10f-456b-b2e4-7ccb7df86e5a" 358 | }, 359 | "outputs": [ 360 | { 361 | "name": "stdout", 362 | "output_type": "stream", 363 | "text": [ 364 | "INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob\n", 365 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n", 366 | "INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:\n", 367 | "INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104')\n", 368 | "INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:\n", 369 | "https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/1680882799009071104?project=31482268105\n", 370 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 371 | "JobState.JOB_STATE_RUNNING\n", 372 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 373 | "JobState.JOB_STATE_RUNNING\n", 374 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 375 | "JobState.JOB_STATE_RUNNING\n", 376 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 377 | "JobState.JOB_STATE_RUNNING\n", 378 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 379 | "JobState.JOB_STATE_RUNNING\n", 380 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 381 | "JobState.JOB_STATE_RUNNING\n", 382 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 383 | "JobState.JOB_STATE_RUNNING\n", 384 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 385 | "JobState.JOB_STATE_RUNNING\n", 386 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n", 387 | "JobState.JOB_STATE_SUCCEEDED\n", 388 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob run completed. Resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n", 389 | "resnet_cifar_latest-20210917022404\n", 390 | "projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n", 391 | "JobState.JOB_STATE_SUCCEEDED\n" 392 | ] 393 | }, 394 | { 395 | "data": { 396 | "text/plain": [ 397 | " \n", 398 | "resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104" 399 | ] 400 | }, 401 | "execution_count": 70, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "create_batch_prediction_job_dedicated_resources_sample(\n", 408 | " project=GOOGLE_CLOUD_PROJECT,\n", 409 | " location=GOOGLE_CLOUD_REGION,\n", 410 | " model_resource_name=\"2008244793993330688\",\n", 411 | " job_display_name=f\"{MODEL_NAME}-{TIMESTAMP}\",\n", 412 | " gcs_source=[f\"{TEST_GCS_BUCKET}/{TEST_FILENAME}\"],\n", 413 | " gcs_destination=f\"{TEST_GCS_BUCKET}/results/\",\n", 414 | " accelerator_type=None,\n", 415 | " accelerator_count=None,\n", 416 | ")" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "## Evaluate Batch Prediction" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 72, 429 | "metadata": { 430 | "colab": { 431 | "base_uri": "https://localhost:8080/" 432 | }, 433 | "id": "b9Hz7CVqbE5o", 434 | "outputId": "08df5dd9-79b1-458b-aa63-b393eee60238" 435 | }, 436 | "outputs": [ 437 | { 438 | "name": "stdout", 439 | "output_type": "stream", 440 | "text": [ 441 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_24_05_801Z/prediction.results-00000-of-00001...\n", 442 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r", 443 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T18_47_39_122Z/prediction.results-00000-of-00001...\n", 444 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r", 445 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T18_47_39_122Z/prediction.errors_stats-00000-of-00001...\n", 446 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r", 447 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_23_55_003Z/prediction.errors_stats-00000-of-00001...\n", 448 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r", 449 | "/ [1/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r", 450 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_23_55_003Z/prediction.results-00000-of-00001...\n", 451 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_24_05_801Z/prediction.errors_stats-00000-of-00001...\n", 452 | "/ [6/6 files][ 1.3 KiB/ 1.3 KiB] 100% Done \n", 453 | "Operation completed over 6 objects/1.3 KiB. \n" 454 | ] 455 | } 456 | ], 457 | "source": [ 458 | "import os\n", 459 | "import json\n", 460 | "\n", 461 | "RESULTS_DIRECTORY = \"results\"\n", 462 | "RESULTS_DIRECTORY_FULL = f'{TEST_GCS_BUCKET}/{RESULTS_DIRECTORY}'\n", 463 | "\n", 464 | "# Create missing directories\n", 465 | "os.makedirs(RESULTS_DIRECTORY, exist_ok=True)\n", 466 | "\n", 467 | "# Get the Cloud Storage paths for each result\n", 468 | "!gsutil -m cp -r $RESULTS_DIRECTORY_FULL $RESULTS_DIRECTORY\n", 469 | "\n", 470 | "# Get most recently modified directory\n", 471 | "latest_directory = max(\n", 472 | " [\n", 473 | " os.path.join(RESULTS_DIRECTORY, d)\n", 474 | " for d in os.listdir(RESULTS_DIRECTORY)\n", 475 | " ],\n", 476 | " key=os.path.getmtime,\n", 477 | ")\n", 478 | "\n", 479 | "# Get downloaded results in directory\n", 480 | "results_files = []\n", 481 | "for dirpath, subdirs, files in os.walk(latest_directory):\n", 482 | " for file in files:\n", 483 | " if file.startswith(\"prediction.results\"):\n", 484 | " results_files.append(os.path.join(dirpath, file))\n", 485 | "\n", 486 | "# Consolidate all the results into a list\n", 487 | "results = []\n", 488 | "for results_file in results_files:\n", 489 | " # Download each result\n", 490 | " with open(results_file, \"r\") as file:\n", 491 | " results.extend([json.loads(line) for line in file.readlines()])" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 73, 497 | "metadata": { 498 | "colab": { 499 | "base_uri": "https://localhost:8080/" 500 | }, 501 | "id": "aCHYk8L0p6ED", 502 | "outputId": "ca9348c1-db4e-43e2-cf41-1a066d727389" 503 | }, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "[{'instance': 'gs://batch-prediction-collection/airplane_0000.jpg',\n", 509 | " 'prediction': {'confidence': 0.635806859, 'label': 'ship'}},\n", 510 | " {'instance': 'gs://batch-prediction-collection/cat_0000.jpg',\n", 511 | " 'prediction': {'confidence': 0.514597297, 'label': 'cat'}},\n", 512 | " {'instance': 'gs://batch-prediction-collection/ship_0000.jpg',\n", 513 | " 'prediction': {'confidence': 0.944843113, 'label': 'ship'}},\n", 514 | " {'instance': 'gs://batch-prediction-collection/bird_0000.jpg',\n", 515 | " 'prediction': {'confidence': 0.710508406, 'label': 'horse'}},\n", 516 | " {'instance': 'gs://batch-prediction-collection/truck_0000.jpg',\n", 517 | " 'prediction': {'confidence': 0.980968714, 'label': 'truck'}},\n", 518 | " {'instance': 'gs://batch-prediction-collection/frog_0000.jpg',\n", 519 | " 'prediction': {'confidence': 0.696931422, 'label': 'frog'}},\n", 520 | " {'instance': 'gs://batch-prediction-collection/dog_0000.jpg',\n", 521 | " 'prediction': {'confidence': 0.382295936, 'label': 'cat'}},\n", 522 | " {'instance': 'gs://batch-prediction-collection/deer_0000.jpg',\n", 523 | " 'prediction': {'confidence': 0.437720776, 'label': 'dog'}},\n", 524 | " {'instance': 'gs://batch-prediction-collection/automobile_0000.jpg',\n", 525 | " 'prediction': {'confidence': 0.460335433, 'label': 'automobile'}},\n", 526 | " {'instance': 'gs://batch-prediction-collection/horse_0000.jpg',\n", 527 | " 'prediction': {'confidence': 0.918733776, 'label': 'dog'}}]" 528 | ] 529 | }, 530 | "execution_count": 73, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "results" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 83, 542 | "metadata": { 543 | "colab": { 544 | "base_uri": "https://localhost:8080/" 545 | }, 546 | "id": "g-6UdwcKqDKO", 547 | "outputId": "4545ebf3-71fa-477f-8c51-1068a6d65226" 548 | }, 549 | "outputs": [ 550 | { 551 | "name": "stdout", 552 | "output_type": "stream", 553 | "text": [ 554 | "label(airplane)/prediction(ship)\n", 555 | "label(cat)/prediction(cat)\n", 556 | "label(ship)/prediction(ship)\n", 557 | "label(bird)/prediction(horse)\n", 558 | "label(truck)/prediction(truck)\n", 559 | "label(frog)/prediction(frog)\n", 560 | "label(dog)/prediction(cat)\n", 561 | "label(deer)/prediction(dog)\n", 562 | "label(automobile)/prediction(automobile)\n", 563 | "label(horse)/prediction(dog)\n", 564 | "\n", 565 | "number of results: 10\n", 566 | "number of correct: 5\n", 567 | "Accuracy: 0.5\n" 568 | ] 569 | } 570 | ], 571 | "source": [ 572 | "num_correct = 0\n", 573 | "\n", 574 | "for result in results:\n", 575 | " label = os.path.basename(result[\"instance\"]).split(\"_\")[0]\n", 576 | " prediction = result[\"prediction\"][\"label\"]\n", 577 | "\n", 578 | " print(f\"label({label})/prediction({prediction})\")\n", 579 | " if label == prediction:\n", 580 | " num_correct = num_correct + 1\n", 581 | "\n", 582 | "print()\n", 583 | "print(f\"number of results: {len(results)}\")\n", 584 | "print(f\"number of correct: {num_correct}\")\n", 585 | "print(f\"Accuracy: {num_correct/len(results)}\")" 586 | ] 587 | } 588 | ], 589 | "metadata": { 590 | "colab": { 591 | "include_colab_link": true, 592 | "name": "03_Batch_Prediction_Performance.ipynb", 593 | "provenance": [] 594 | }, 595 | "kernelspec": { 596 | "display_name": "Python 3 (ipykernel)", 597 | "language": "python", 598 | "name": "python3" 599 | }, 600 | "language_info": { 601 | "codemirror_mode": { 602 | "name": "ipython", 603 | "version": 3 604 | }, 605 | "file_extension": ".py", 606 | "mimetype": "text/x-python", 607 | "name": "python", 608 | "nbconvert_exporter": "python", 609 | "pygments_lexer": "ipython3", 610 | "version": "3.8.2" 611 | } 612 | }, 613 | "nbformat": 4, 614 | "nbformat_minor": 2 615 | } 616 | -------------------------------------------------------------------------------- /notebooks/04_Cloud_Scheduler_Trigger.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "T_WNxUbMmgfw" 17 | }, 18 | "source": [ 19 | "# Outline" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "V9JhU_tzmgfz" 26 | }, 27 | "source": [ 28 | "1. Create Pub/Sub Topic ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_function_trigger.ipynb))\n", 29 | "2. Deploy Cloud Function ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_function_trigger.ipynb))\n", 30 | " - check if there are enough number of images in a specific GCS bucket\n", 31 | "3. Publish Pub/Sub Topic to trigger batch prediction pipeline ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_scheduler_trigger.ipynb))\n", 32 | " - need pipeline JSON spec somewhere in GCS" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "id": "Iva2o8C-mujw" 39 | }, 40 | "source": [ 41 | "# Setup" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 1, 47 | "metadata": { 48 | "colab": { 49 | "base_uri": "https://localhost:8080/" 50 | }, 51 | "id": "2A7-ml0Yt6lX", 52 | "outputId": "8f368fdf-1a56-4440-91d3-0c766d5ed369" 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\u001b[?25l\r", 60 | "\u001b[K |███▌ | 10 kB 30.5 MB/s eta 0:00:01\r", 61 | "\u001b[K |███████ | 20 kB 21.4 MB/s eta 0:00:01\r", 62 | "\u001b[K |██████████▍ | 30 kB 16.3 MB/s eta 0:00:01\r", 63 | "\u001b[K |█████████████▉ | 40 kB 14.2 MB/s eta 0:00:01\r", 64 | "\u001b[K |█████████████████▍ | 51 kB 6.6 MB/s eta 0:00:01\r", 65 | "\u001b[K |████████████████████▉ | 61 kB 7.0 MB/s eta 0:00:01\r", 66 | "\u001b[K |████████████████████████▎ | 71 kB 7.5 MB/s eta 0:00:01\r", 67 | "\u001b[K |███████████████████████████▊ | 81 kB 8.4 MB/s eta 0:00:01\r", 68 | "\u001b[K |███████████████████████████████▏| 92 kB 8.6 MB/s eta 0:00:01\r", 69 | "\u001b[K |████████████████████████████████| 94 kB 2.9 MB/s \n", 70 | "\u001b[?25h\u001b[?25l\r", 71 | "\u001b[K |███████▍ | 10 kB 38.2 MB/s eta 0:00:01\r", 72 | "\u001b[K |██████████████▉ | 20 kB 43.9 MB/s eta 0:00:01\r", 73 | "\u001b[K |██████████████████████▎ | 30 kB 51.4 MB/s eta 0:00:01\r", 74 | "\u001b[K |█████████████████████████████▊ | 40 kB 54.8 MB/s eta 0:00:01\r", 75 | "\u001b[K |████████████████████████████████| 44 kB 2.9 MB/s \n", 76 | "\u001b[?25h" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "!pip install --upgrade -q google-cloud-scheduler" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "id": "qJLNWsPamwNw" 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "!gcloud init" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 3, 98 | "metadata": { 99 | "id": "TRkHpHnQmzof" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "from google.colab import auth\n", 104 | "\n", 105 | "auth.authenticate_user()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 43, 111 | "metadata": { 112 | "id": "WH4ZvM_3m397" 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "GOOGLE_CLOUD_PROJECT = \"gcp-ml-172005\" # @param {type:\"string\"}\n", 117 | "GOOGLE_CLOUD_REGION = \"us-central1\"\n", 118 | "\n", 119 | "GCS_BUCKET_NAME = \"cifar10-experimental-csp2\" # @param {type:\"string\"}\n", 120 | "PIPELINE_NAME = \"continuous-adaptation-for-data-changes-batch\" # @param {type:\"string\"}\n", 121 | "PIPELINE_ROOT = \"gs://{}/pipeline_root/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n", 122 | "PIPELINE_LOCATION = f\"{PIPELINE_ROOT}/{PIPELINE_NAME}_pipeline.json\"\n", 123 | "\n", 124 | "PUBSUB_TOPIC = f\"trigger-{PIPELINE_NAME}\"\n", 125 | "\n", 126 | "SCHEDULER_JOB_NAME = f\"scheduler-job-{PUBSUB_TOPIC}\"\n", 127 | "\n", 128 | "IMAGE_LOCATION_BUCKET = \"batch-prediction-collection-3\" # @param {type:\"string\"}" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 44, 134 | "metadata": { 135 | "colab": { 136 | "base_uri": "https://localhost:8080/", 137 | "height": 35 138 | }, 139 | "id": "kpidhm309Ed9", 140 | "outputId": "fadb39ba-d84e-462d-803c-fce387229362" 141 | }, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "application/vnd.google.colaboratory.intrinsic+json": { 146 | "type": "string" 147 | }, 148 | "text/plain": [ 149 | "'batch-prediction-collection-3'" 150 | ] 151 | }, 152 | "execution_count": 44, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "IMAGE_LOCATION_BUCKET" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": { 164 | "id": "3622c4BaodLT" 165 | }, 166 | "source": [ 167 | "# Create Pub/Sub Topic" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "colab": { 175 | "base_uri": "https://localhost:8080/" 176 | }, 177 | "id": "_1RNMdR-ofBn", 178 | "outputId": "b53dac52-ab2b-49ad-d92c-eec4782fad1b" 179 | }, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "Created topic [projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch].\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "!gcloud pubsub topics create {PUBSUB_TOPIC}" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "id": "s2ISbBXvoiN7" 197 | }, 198 | "source": [ 199 | "# Deploy Cloud Function" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "id": "TeYmsiYAqroy" 206 | }, 207 | "source": [ 208 | "### Create Cloud Function Directory" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 6, 214 | "metadata": { 215 | "id": "y_hJOA47prkp" 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "!mkdir -p cloud_function\n", 220 | "!touch cloud_function/__init__.py" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "id": "p3gLM4SQqxMa" 227 | }, 228 | "source": [ 229 | "### Create Requirements.txt" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 7, 235 | "metadata": { 236 | "id": "o-7JN3huqdEO" 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "_cloud_function_dep = \"cloud_function/requirements.txt\"" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 8, 246 | "metadata": { 247 | "colab": { 248 | "base_uri": "https://localhost:8080/" 249 | }, 250 | "id": "W9a9myWFqj14", 251 | "outputId": "ce6ef3f5-29dc-4651-ee62-167fb95d4634" 252 | }, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "Writing cloud_function/requirements.txt\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "%%writefile {_cloud_function_dep}\n", 264 | "\n", 265 | "kfp==1.6.2\n", 266 | "google-cloud-aiplatform\n", 267 | "google-cloud-storage" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "id": "WhaDWKaRqzzH" 274 | }, 275 | "source": [ 276 | "### Create Cloud Function Module" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 9, 282 | "metadata": { 283 | "colab": { 284 | "base_uri": "https://localhost:8080/" 285 | }, 286 | "id": "npdLDFazZX0v", 287 | "outputId": "dead3d54-31ab-4588-bb98-ee1e6b5610cb" 288 | }, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "Requirement already satisfied: google-cloud-storage in /usr/local/lib/python3.7/dist-packages (1.18.1)\n", 295 | "Requirement already satisfied: google-resumable-media<0.5.0dev,>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (0.4.1)\n", 296 | "Requirement already satisfied: google-auth>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (1.35.0)\n", 297 | "Requirement already satisfied: google-cloud-core<2.0dev,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (1.0.3)\n", 298 | "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (4.7.2)\n", 299 | "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (1.15.0)\n", 300 | "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (57.4.0)\n", 301 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (4.2.4)\n", 302 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (0.2.8)\n", 303 | "Requirement already satisfied: google-api-core<2.0.0dev,>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.26.3)\n", 304 | "Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (3.17.3)\n", 305 | "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.23.0)\n", 306 | "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (21.0)\n", 307 | "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2018.9)\n", 308 | "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.53.0)\n", 309 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.4.7)\n", 310 | "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2.0->google-cloud-storage) (0.4.8)\n", 311 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2021.5.30)\n", 312 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (3.0.4)\n", 313 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.10)\n", 314 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.24.3)\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "!pip install google-cloud-storage" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 40, 325 | "metadata": { 326 | "id": "OwldC4ntpD7Z" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "_cloud_function_file = \"cloud_function/main.py\"" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 57, 336 | "metadata": { 337 | "colab": { 338 | "base_uri": "https://localhost:8080/" 339 | }, 340 | "id": "59_-cyfIonqP", 341 | "outputId": "1ef71f8a-dc15-48ed-dbcb-97eff5cd7531" 342 | }, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "Overwriting cloud_function/main.py\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "%%writefile {_cloud_function_file}\n", 354 | "\n", 355 | "import os\n", 356 | "import re\n", 357 | "import json\n", 358 | "import logging\n", 359 | "import base64\n", 360 | "\n", 361 | "from datetime import datetime\n", 362 | "\n", 363 | "from kfp.v2.google.client import AIPlatformClient\n", 364 | "from google.cloud import storage\n", 365 | "\n", 366 | "\n", 367 | "def get_number_of_images(storage_client, bucket, latest_directory):\n", 368 | " blobs = storage_client.list_blobs(bucket, prefix=latest_directory)\n", 369 | "\n", 370 | " count = 0\n", 371 | " for blob in blobs:\n", 372 | " if blob.name.split(\".\")[-1] == \"jpg\":\n", 373 | " count = count + 1\n", 374 | "\n", 375 | " return count\n", 376 | "\n", 377 | "\n", 378 | "def is_there_enough_images(storage_client, bucket, latest_directory, threshold):\n", 379 | " number_of_images = get_number_of_images(storage_client, bucket, latest_directory)\n", 380 | " print(f\"number of images = {number_of_images}\")\n", 381 | " return number_of_images >= threshold\n", 382 | "\n", 383 | "\n", 384 | "def get_latest_directory(storage_client, bucket):\n", 385 | " blobs = storage_client.list_blobs(bucket)\n", 386 | "\n", 387 | " folders = list(\n", 388 | " set(\n", 389 | " [\n", 390 | " os.path.dirname(blob.name)\n", 391 | " for blob in blobs\n", 392 | " if bool(\n", 393 | " re.match(\n", 394 | " \"[1-9][0-9][0-9][0-9]-[0-1][0-9]\", os.path.dirname(blob.name)\n", 395 | " )\n", 396 | " )\n", 397 | " is True\n", 398 | " ]\n", 399 | " )\n", 400 | " )\n", 401 | "\n", 402 | " folders.sort(key=lambda date: datetime.strptime(date, \"%Y-%m\"))\n", 403 | " print(folders[0])\n", 404 | " return folders[0]\n", 405 | "\n", 406 | "\n", 407 | "def trigger_pipeline(event, context):\n", 408 | " # Parse the environment variables.\n", 409 | " project = os.getenv(\"PROJECT\")\n", 410 | " region = os.getenv(\"REGION\")\n", 411 | " gcs_pipeline_file_location = os.getenv(\"GCS_PIPELINE_FILE_LOCATION\")\n", 412 | " gcs_image_bucket = os.getenv(\"GCS_IMAGE_BUCKET\")\n", 413 | "\n", 414 | " print(project)\n", 415 | " print(region)\n", 416 | " print(gcs_pipeline_file_location)\n", 417 | " print(gcs_image_bucket)\n", 418 | "\n", 419 | " threshold = 100\n", 420 | "\n", 421 | " # Check if the pipeline file exists in the provided GCS Bucket.\n", 422 | " storage_client = storage.Client()\n", 423 | " latest_directory = get_latest_directory(storage_client, gcs_image_bucket)\n", 424 | "\n", 425 | " if is_there_enough_images(\n", 426 | " storage_client, gcs_image_bucket, latest_directory, threshold\n", 427 | " ):\n", 428 | " path_parts = gcs_pipeline_file_location.replace(\"gs://\", \"\").split(\"/\")\n", 429 | " pipeline_bucket = path_parts[0]\n", 430 | " pipeline_blob = \"/\".join(path_parts[1:])\n", 431 | "\n", 432 | " pipeline_bucket = storage_client.bucket(pipeline_bucket)\n", 433 | " blob = storage.Blob(bucket=pipeline_bucket, name=pipeline_blob)\n", 434 | "\n", 435 | " if not blob.exists(storage_client):\n", 436 | " raise ValueError(f\"{gcs_pipeline_file_location} does not exist.\")\n", 437 | "\n", 438 | " # Initialize Vertex AI API client and submit for pipeline execution.\n", 439 | " api_client = AIPlatformClient(project_id=project, region=region)\n", 440 | "\n", 441 | " response = api_client.create_run_from_job_spec(\n", 442 | " job_spec_path=gcs_pipeline_file_location,\n", 443 | " parameter_values={\"data_gcs_prefix\": latest_directory},\n", 444 | " enable_caching=True,\n", 445 | " )\n", 446 | "\n", 447 | " logging.info(response)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": { 453 | "id": "kUjuI5z7rH0Z" 454 | }, 455 | "source": [ 456 | "### Deploy Cloud Function" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 58, 462 | "metadata": { 463 | "id": "8zjMovG1WyIV" 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "!cd cloud_function" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 59, 473 | "metadata": { 474 | "colab": { 475 | "base_uri": "https://localhost:8080/" 476 | }, 477 | "id": "lAcMJL-9qpoP", 478 | "outputId": "bf56830a-e1c4-460d-c50a-5be902669698" 479 | }, 480 | "outputs": [ 481 | { 482 | "name": "stdout", 483 | "output_type": "stream", 484 | "text": [ 485 | "PROJECT=gcp-ml-172005,REGION=us-central1,GCS_PIPELINE_FILE_LOCATION=gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes-batch/continuous-adaptation-for-data-changes-batch_pipeline.json,GCS_IMAGE_BUCKET=batch-prediction-collection-3\n" 486 | ] 487 | } 488 | ], 489 | "source": [ 490 | "ENV_VARS=f\"\"\"\\\n", 491 | "PROJECT={GOOGLE_CLOUD_PROJECT},\\\n", 492 | "REGION={GOOGLE_CLOUD_REGION},\\\n", 493 | "GCS_PIPELINE_FILE_LOCATION={PIPELINE_LOCATION},\\\n", 494 | "GCS_IMAGE_BUCKET={IMAGE_LOCATION_BUCKET}\n", 495 | "\"\"\"\n", 496 | "\n", 497 | "!echo {ENV_VARS}" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 60, 503 | "metadata": { 504 | "colab": { 505 | "base_uri": "https://localhost:8080/" 506 | }, 507 | "id": "bdw7HUcSpBU7", 508 | "outputId": "ca11ac12-819d-4d3d-9c8a-5a1f9bc1a1b6" 509 | }, 510 | "outputs": [ 511 | { 512 | "name": "stdout", 513 | "output_type": "stream", 514 | "text": [ 515 | "\n", 516 | "For Cloud Build Logs, visit: https://console.cloud.google.com/cloud-build/builds;region=us-central1/b6851c4a-a1a5-47c2-8cb1-a22f927867ad?project=874401645461\n", 517 | "availableMemoryMb: 256\n", 518 | "buildId: 5dea4cb3-7602-4c7e-992a-2ddcaa5566bd\n", 519 | "buildName: projects/874401645461/locations/us-central1/builds/5dea4cb3-7602-4c7e-992a-2ddcaa5566bd\n", 520 | "entryPoint: trigger_pipeline\n", 521 | "environmentVariables:\n", 522 | " GCS_IMAGE_BUCKET: batch-prediction-collection-3\n", 523 | " GCS_IMAGE_FILE_LOCATION: gs://batch-prediction-collection-3\n", 524 | " GCS_PIPELINE_FILE_LOCATION: gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes-batch/continuous-adaptation-for-data-changes-batch_pipeline.json\n", 525 | " PROJECT: gcp-ml-172005\n", 526 | " REGION: us-central1\n", 527 | "eventTrigger:\n", 528 | " eventType: google.pubsub.topic.publish\n", 529 | " failurePolicy: {}\n", 530 | " resource: projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch\n", 531 | " service: pubsub.googleapis.com\n", 532 | "ingressSettings: ALLOW_ALL\n", 533 | "labels:\n", 534 | " deployment-tool: cli-gcloud\n", 535 | "name: projects/gcp-ml-172005/locations/us-central1/functions/trigger-continuous-adaptation-for-data-changes-batch-fn\n", 536 | "runtime: python37\n", 537 | "serviceAccountEmail: gcp-ml-172005@appspot.gserviceaccount.com\n", 538 | "sourceArchiveUrl: gs://cifar10-experimental-csp2/us-central1-projects/gcp-ml-172005/locations/us-central1/functions/trigger-continuous-adaptation-for-data-changes-batch-fn-rkbjepcrnujy.zip\n", 539 | "status: ACTIVE\n", 540 | "timeout: 60s\n", 541 | "updateTime: '2021-10-19T00:48:41.632Z'\n", 542 | "versionId: '14'\n" 543 | ] 544 | } 545 | ], 546 | "source": [ 547 | "BUCKET = f'gs://{GCS_BUCKET_NAME}'\n", 548 | "CLOUD_FUNCTION_NAME = f'trigger-{PIPELINE_NAME}-fn'\n", 549 | "\n", 550 | "!gcloud functions deploy {CLOUD_FUNCTION_NAME} \\\n", 551 | " --region={GOOGLE_CLOUD_REGION} \\\n", 552 | " --trigger-topic={PUBSUB_TOPIC} \\\n", 553 | " --runtime=python37 \\\n", 554 | " --source=cloud_function\\\n", 555 | " --entry-point=trigger_pipeline\\\n", 556 | " --stage-bucket={BUCKET}\\\n", 557 | " --update-env-vars={ENV_VARS}" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": { 563 | "id": "NEZIpLjNrNe6" 564 | }, 565 | "source": [ 566 | "### See the Progress" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "colab": { 574 | "base_uri": "https://localhost:8080/", 575 | "height": 34 576 | }, 577 | "id": "WOudc6YvrPZA", 578 | "outputId": "9c70e204-86c8-44f7-db60-60cf1985aa7b" 579 | }, 580 | "outputs": [ 581 | { 582 | "data": { 583 | "text/html": [ 584 | "See the Cloud Function details here." 585 | ], 586 | "text/plain": [ 587 | "" 588 | ] 589 | }, 590 | "metadata": {}, 591 | "output_type": "display_data" 592 | } 593 | ], 594 | "source": [ 595 | "import IPython\n", 596 | "\n", 597 | "cloud_fn_url = f\"https://console.cloud.google.com/functions/details/{GOOGLE_CLOUD_REGION}/{CLOUD_FUNCTION_NAME}\"\n", 598 | "html = (\n", 599 | " f'See the Cloud Function details here.'\n", 600 | ")\n", 601 | "IPython.display.display(IPython.display.HTML(html))" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": { 607 | "id": "iknpM94_tnOc" 608 | }, 609 | "source": [ 610 | "# Create Cloud Scheduler's Job" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": null, 616 | "metadata": { 617 | "colab": { 618 | "base_uri": "https://localhost:8080/" 619 | }, 620 | "id": "7dx03Q6Qt0n4", 621 | "outputId": "d8a6f937-a4fc-4a60-859c-6fef96ae4df2" 622 | }, 623 | "outputs": [ 624 | { 625 | "name": "stdout", 626 | "output_type": "stream", 627 | "text": [ 628 | "name: projects/gcp-ml-172005/locations/us-central1/jobs/scheduler-job-trigger-continuous-adaptation-for-data-changes-batch\n", 629 | "pubsubTarget:\n", 630 | " attributes:\n", 631 | " name: scheduler\n", 632 | " topicName: projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch\n", 633 | "retryConfig:\n", 634 | " maxBackoffDuration: 3600s\n", 635 | " maxDoublings: 16\n", 636 | " maxRetryDuration: 0s\n", 637 | " minBackoffDuration: 5s\n", 638 | "schedule: '*/3 * * * *'\n", 639 | "state: ENABLED\n", 640 | "timeZone: Etc/UTC\n", 641 | "userUpdateTime: '2021-10-18T01:10:04Z'\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "!gcloud scheduler jobs create pubsub $SCHEDULER_JOB_NAME --schedule \"*/3 * * * *\" --topic $PUBSUB_TOPIC --attributes name=scheduler #every hour" 647 | ] 648 | } 649 | ], 650 | "metadata": { 651 | "colab": { 652 | "include_colab_link": true, 653 | "name": "04_Cloud_Scheduler_Trigger.ipynb", 654 | "provenance": [] 655 | }, 656 | "kernelspec": { 657 | "display_name": "Python 3 (ipykernel)", 658 | "language": "python", 659 | "name": "python3" 660 | }, 661 | "language_info": { 662 | "codemirror_mode": { 663 | "name": "ipython", 664 | "version": 3 665 | }, 666 | "file_extension": ".py", 667 | "mimetype": "text/x-python", 668 | "name": "python", 669 | "nbconvert_exporter": "python", 670 | "pygments_lexer": "ipython3", 671 | "version": "3.8.2" 672 | } 673 | }, 674 | "nbformat": 4, 675 | "nbformat_minor": 1 676 | } 677 | -------------------------------------------------------------------------------- /notebooks/02_TFX_Training_Pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "mTVp-9PGYFIO" 17 | }, 18 | "source": [ 19 | "This notebook assumes you are familiar with the basics of Vertex AI, TFX (especially custom components), and TensorFlow. " 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "W7gJqmqrsfqh" 26 | }, 27 | "source": [ 28 | "## References\n", 29 | "\n", 30 | "This notebook refers to the following resources and also reuses parts of the code from there: \n", 31 | "* [Simple TFX Pipeline for Vertex Pipelines](https://colab.research.google.com/github/tensorflow/tfx/blob/master/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb)\n", 32 | "* [Vertex AI Training with TFX and Vertex Pipelines](https://www.tensorflow.org/tfx/tutorials/tfx/gcp/vertex_pipelines_vertex_training)\n", 33 | "* [Importing models to Vertex AI](https://cloud.google.com/vertex-ai/docs/general/import-model)\n", 34 | "* [Deploying a model using the Vertex AI API](https://cloud.google.com/vertex-ai/docs/predictions/deploy-model-api)\n", 35 | "* [MLOPs with Vertex AI](https://github.com/GoogleCloudPlatform/mlops-with-vertex-ai)\n", 36 | "* [Custom components TFX](https://www.tensorflow.org/tfx/tutorials/tfx/python_function_component)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "O9aBRdubPFsU" 43 | }, 44 | "source": [ 45 | "## Prerequisites\n", 46 | "- Enable Vertex AI API\n", 47 | "- Add the following rules to IAM\n", 48 | " - Vertex AI Custom Code Service Agent\n", 49 | " - Vertex AI Service Agent\n", 50 | " - Vertex AI User\n", 51 | " - Artifact Registry Service Agent\n", 52 | " - Container Registry Service Agent" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "id": "D04aKMGWXjOu" 59 | }, 60 | "source": [ 61 | "## Setup" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 1, 67 | "metadata": { 68 | "id": "I_niUhp_TY1G" 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "# Use the latest version of pip.\n", 73 | "%%capture\n", 74 | "!pip install --upgrade tfx==1.2.0 kfp==1.6.1\n", 75 | "!pip install -q --upgrade google-cloud-aiplatform" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "id": "ZVmgQ6w1oT_Z" 82 | }, 83 | "source": [ 84 | "### ***Please restart runtime before continuing.*** " 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "id": "mstgsNHWoiXk" 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "!gcloud init" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 2, 101 | "metadata": { 102 | "id": "Pl8ewjX3oXRx" 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "from google.colab import auth\n", 107 | "auth.authenticate_user()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "id": "zqVWpmywXngD" 114 | }, 115 | "source": [ 116 | "## Imports" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 3, 122 | "metadata": { 123 | "colab": { 124 | "base_uri": "https://localhost:8080/" 125 | }, 126 | "id": "wptXF0e-UXsT", 127 | "outputId": "3228fd0e-aac7-454d-dcfa-ddcf3f74ee12" 128 | }, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "TensorFlow version: 2.5.1\n", 135 | "TFX version: 1.2.0\n", 136 | "KFP version: 1.6.1\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "import tensorflow as tf\n", 142 | "\n", 143 | "print(\"TensorFlow version: {}\".format(tf.__version__))\n", 144 | "from tfx import v1 as tfx\n", 145 | "\n", 146 | "print(\"TFX version: {}\".format(tfx.__version__))\n", 147 | "import kfp\n", 148 | "\n", 149 | "print(\"KFP version: {}\".format(kfp.__version__))\n", 150 | "\n", 151 | "from google.cloud import aiplatform as vertex_ai\n", 152 | "import os" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": { 158 | "id": "hFYHeepnXxpZ" 159 | }, 160 | "source": [ 161 | "## Environment setup" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 4, 167 | "metadata": { 168 | "id": "zPVyBrXrW-vu" 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "GOOGLE_CLOUD_PROJECT = \"gcp-ml-172005\" # @param {type:\"string\"}\n", 173 | "GOOGLE_CLOUD_REGION = \"us-central1\" # @param {type:\"string\"}\n", 174 | "GCS_BUCKET_NAME = \"cifar10-experimental-csp2\" # @param {type:\"string\"}\n", 175 | "DATA_ROOT = \"gs://cifar10-csp-public2\" # @param {type:\"string\"}\n", 176 | "\n", 177 | "if not (GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_REGION and GCS_BUCKET_NAME):\n", 178 | " from absl import logging\n", 179 | "\n", 180 | " logging.error(\"Please set all required parameters.\")" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "id": "CV-BZSvQq7YY" 187 | }, 188 | "source": [ 189 | "The location of the bucket must be a single region. Also, the bucket needs to be created in a region when [Vertex AI services are available](https://cloud.google.com/vertex-ai/docs/general/locations#available_regions). " 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 5, 195 | "metadata": { 196 | "colab": { 197 | "base_uri": "https://localhost:8080/" 198 | }, 199 | "id": "J65KHrt4X-Fu", 200 | "outputId": "ab76e6a9-dc20-41df-e42b-bc516ee67e7f" 201 | }, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "PIPELINE_ROOT: gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "PIPELINE_NAME = \"continuous-adaptation-for-data-changes\"\n", 213 | "\n", 214 | "# Path to various pipeline artifact.\n", 215 | "PIPELINE_ROOT = \"gs://{}/pipeline_root/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n", 216 | "\n", 217 | "# Paths for users' Python module.\n", 218 | "MODULE_ROOT = \"gs://{}/pipeline_module/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n", 219 | "\n", 220 | "# This is the path where your model will be pushed for serving.\n", 221 | "SERVING_MODEL_DIR = \"gs://{}/serving_model/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n", 222 | "\n", 223 | "print(\"PIPELINE_ROOT: {}\".format(PIPELINE_ROOT))" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "kQVpzyftX0y0" 230 | }, 231 | "source": [ 232 | "## Create training modules" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 6, 238 | "metadata": { 239 | "id": "zFgnx3uGAfuj" 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "_trainer_module_file = 'trainer.py'" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 7, 249 | "metadata": { 250 | "colab": { 251 | "base_uri": "https://localhost:8080/" 252 | }, 253 | "id": "sZqzotkfAf-C", 254 | "outputId": "ec319d61-dbbb-4095-f2d2-0d2704d872ba" 255 | }, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "Writing trainer.py\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "%%writefile {_trainer_module_file}\n", 267 | "\n", 268 | "from typing import List\n", 269 | "from absl import logging\n", 270 | "from tensorflow import keras\n", 271 | "from tfx import v1 as tfx\n", 272 | "import tensorflow as tf\n", 273 | "\n", 274 | "_IMAGE_FEATURES = {\n", 275 | " \"image\": tf.io.FixedLenFeature([], tf.string),\n", 276 | " \"label\": tf.io.FixedLenFeature([], tf.int64),\n", 277 | "}\n", 278 | "\n", 279 | "_CONCRETE_INPUT = \"numpy_inputs\"\n", 280 | "_TRAIN_BATCH_SIZE = 64\n", 281 | "_EVAL_BATCH_SIZE = 64\n", 282 | "_INPUT_SHAPE = (32, 32, 3)\n", 283 | "_EPOCHS = 2\n", 284 | "\n", 285 | "\n", 286 | "def _parse_fn(example):\n", 287 | " example = tf.io.parse_single_example(example, _IMAGE_FEATURES)\n", 288 | " image = tf.image.decode_jpeg(example[\"image\"], channels=3)\n", 289 | " class_label = tf.cast(example[\"label\"], tf.int32)\n", 290 | " return image, class_label\n", 291 | "\n", 292 | "\n", 293 | "def _input_fn(file_pattern: List[str], batch_size: int) -> tf.data.Dataset:\n", 294 | " print(f\"Reading data from: {file_pattern}\")\n", 295 | " tfrecord_filenames = tf.io.gfile.glob(file_pattern[0] + \".gz\")\n", 296 | " print(tfrecord_filenames)\n", 297 | " dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type=\"GZIP\")\n", 298 | " dataset = dataset.map(_parse_fn).batch(batch_size)\n", 299 | " return dataset.repeat()\n", 300 | "\n", 301 | "\n", 302 | "def _make_keras_model() -> tf.keras.Model:\n", 303 | " \"\"\"Creates a ResNet50-based model for classifying flowers data.\n", 304 | "\n", 305 | " Returns:\n", 306 | " A Keras Model.\n", 307 | " \"\"\"\n", 308 | " inputs = keras.Input(shape=_INPUT_SHAPE)\n", 309 | " base_model = keras.applications.ResNet50(\n", 310 | " include_top=False, input_shape=_INPUT_SHAPE, pooling=\"avg\"\n", 311 | " )\n", 312 | " base_model.trainable = False\n", 313 | " x = tf.keras.applications.resnet.preprocess_input(inputs)\n", 314 | " x = base_model(\n", 315 | " x, training=False\n", 316 | " ) # Ensures BatchNorm runs in inference model in this model\n", 317 | " outputs = keras.layers.Dense(10, activation=\"softmax\")(x)\n", 318 | " model = keras.Model(inputs, outputs)\n", 319 | "\n", 320 | " model.compile(\n", 321 | " optimizer=keras.optimizers.Adam(),\n", 322 | " loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n", 323 | " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", 324 | " )\n", 325 | "\n", 326 | " model.summary(print_fn=logging.info)\n", 327 | " return model\n", 328 | "\n", 329 | "\n", 330 | "def _preprocess(bytes_input):\n", 331 | " decoded = tf.io.decode_jpeg(bytes_input, channels=3)\n", 332 | " resized = tf.image.resize(decoded, size=(32, 32))\n", 333 | " return resized\n", 334 | "\n", 335 | "\n", 336 | "@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n", 337 | "def preprocess_fn(bytes_inputs):\n", 338 | " decoded_images = tf.map_fn(\n", 339 | " _preprocess, bytes_inputs, dtype=tf.float32, back_prop=False\n", 340 | " )\n", 341 | " return {_CONCRETE_INPUT: decoded_images}\n", 342 | "\n", 343 | "\n", 344 | "def _model_exporter(model: tf.keras.Model):\n", 345 | " m_call = tf.function(model.call).get_concrete_function(\n", 346 | " [tf.TensorSpec(shape=[None, 32, 32, 3], dtype=tf.float32, name=_CONCRETE_INPUT)]\n", 347 | " )\n", 348 | "\n", 349 | " @tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n", 350 | " def serving_fn(bytes_inputs):\n", 351 | " # This function comes from the Computer Vision book from O'Reilly.\n", 352 | " labels = tf.constant(\n", 353 | " [\n", 354 | " \"airplane\",\n", 355 | " \"automobile\",\n", 356 | " \"bird\",\n", 357 | " \"cat\",\n", 358 | " \"deer\",\n", 359 | " \"dog\",\n", 360 | " \"frog\",\n", 361 | " \"horse\",\n", 362 | " \"ship\",\n", 363 | " \"truck\",\n", 364 | " ],\n", 365 | " dtype=tf.string,\n", 366 | " )\n", 367 | " images = preprocess_fn(bytes_inputs)\n", 368 | "\n", 369 | " probs = m_call(**images)\n", 370 | " indices = tf.argmax(probs, axis=1)\n", 371 | " pred_source = tf.gather(params=labels, indices=indices)\n", 372 | " pred_confidence = tf.reduce_max(probs, axis=1)\n", 373 | " return {\"label\": pred_source, \"confidence\": pred_confidence}\n", 374 | "\n", 375 | " return serving_fn\n", 376 | "\n", 377 | "\n", 378 | "def run_fn(fn_args: tfx.components.FnArgs):\n", 379 | " print(fn_args)\n", 380 | "\n", 381 | " train_dataset = _input_fn(fn_args.train_files, batch_size=_TRAIN_BATCH_SIZE)\n", 382 | " eval_dataset = _input_fn(fn_args.eval_files, batch_size=_EVAL_BATCH_SIZE)\n", 383 | "\n", 384 | " model = _make_keras_model()\n", 385 | " model.fit(\n", 386 | " train_dataset,\n", 387 | " steps_per_epoch=fn_args.train_steps,\n", 388 | " validation_data=eval_dataset,\n", 389 | " validation_steps=fn_args.eval_steps,\n", 390 | " epochs=_EPOCHS,\n", 391 | " )\n", 392 | "\n", 393 | " _, acc = model.evaluate(eval_dataset, steps=fn_args.eval_steps)\n", 394 | " logging.info(f\"Validation accuracy: {round(acc * 100, 2)}%\")\n", 395 | " # The result of the training should be saved in `fn_args.serving_model_dir`\n", 396 | " # directory.\n", 397 | " tf.saved_model.save(\n", 398 | " model,\n", 399 | " fn_args.serving_model_dir,\n", 400 | " signatures={\"serving_default\": _model_exporter(model)},\n", 401 | " )" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 8, 407 | "metadata": { 408 | "colab": { 409 | "base_uri": "https://localhost:8080/" 410 | }, 411 | "id": "DEPD_70MLf9b", 412 | "outputId": "36120020-9a1b-4e57-d39c-7d3b93697a2c" 413 | }, 414 | "outputs": [ 415 | { 416 | "name": "stdout", 417 | "output_type": "stream", 418 | "text": [ 419 | "Copying file://trainer.py [Content-Type=text/x-python]...\n", 420 | "/ [1 files][ 3.8 KiB/ 3.8 KiB] \n", 421 | "Operation completed over 1 objects/3.8 KiB. \n", 422 | " 3.8 KiB 2021-10-16T14:22:00Z gs://cifar10-experimental-csp2/pipeline_module/continuous-adaptation-for-data-changes/trainer.py\n", 423 | "TOTAL: 1 objects, 3890 bytes (3.8 KiB)\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "!gsutil cp {_trainer_module_file} {MODULE_ROOT}/\n", 429 | "!gsutil ls -lh {MODULE_ROOT}/" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 9, 435 | "metadata": { 436 | "colab": { 437 | "base_uri": "https://localhost:8080/", 438 | "height": 35 439 | }, 440 | "id": "uKK1LHdaNIJc", 441 | "outputId": "74130cba-63c5-474f-c915-6ab3c355a3f3" 442 | }, 443 | "outputs": [ 444 | { 445 | "data": { 446 | "application/vnd.google.colaboratory.intrinsic+json": { 447 | "type": "string" 448 | }, 449 | "text/plain": [ 450 | "'gs://cifar10-experimental-csp2/pipeline_module/continuous-adaptation-for-data-changes/trainer.py'" 451 | ] 452 | }, 453 | "execution_count": 9, 454 | "metadata": {}, 455 | "output_type": "execute_result" 456 | } 457 | ], 458 | "source": [ 459 | "os.path.join(MODULE_ROOT, _trainer_module_file)" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": { 465 | "id": "-WTt_mw3cnia" 466 | }, 467 | "source": [ 468 | "## Custom Vertex Components \n", 469 | "- basically cloned from [Dual Deployment Project]()" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 10, 475 | "metadata": { 476 | "id": "T7XUOglgctyb" 477 | }, 478 | "outputs": [], 479 | "source": [ 480 | "_vertex_uploader_module_file = \"vertex_uploader.py\"\n", 481 | "_vertex_deployer_module_file = \"vertex_deployer.py\"" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 11, 487 | "metadata": { 488 | "colab": { 489 | "base_uri": "https://localhost:8080/" 490 | }, 491 | "id": "qnOtYLm6cviP", 492 | "outputId": "5334b065-a124-47ab-91d3-bbff4f470396" 493 | }, 494 | "outputs": [ 495 | { 496 | "name": "stdout", 497 | "output_type": "stream", 498 | "text": [ 499 | "Writing vertex_uploader.py\n" 500 | ] 501 | } 502 | ], 503 | "source": [ 504 | "%%writefile {_vertex_uploader_module_file}\n", 505 | "\n", 506 | "import os\n", 507 | "import tensorflow as tf\n", 508 | "\n", 509 | "from tfx.dsl.component.experimental.decorators import component\n", 510 | "from tfx.dsl.component.experimental.annotations import Parameter\n", 511 | "from tfx.types.standard_artifacts import String\n", 512 | "from google.cloud import aiplatform as vertex_ai\n", 513 | "from tfx import v1 as tfx\n", 514 | "from absl import logging\n", 515 | "\n", 516 | "\n", 517 | "@component\n", 518 | "def VertexUploader(\n", 519 | " project: Parameter[str],\n", 520 | " region: Parameter[str],\n", 521 | " model_display_name: Parameter[str],\n", 522 | " pushed_model_location: Parameter[str],\n", 523 | " serving_image_uri: Parameter[str],\n", 524 | " uploaded_model: tfx.dsl.components.OutputArtifact[String],\n", 525 | "):\n", 526 | "\n", 527 | " vertex_ai.init(project=project, location=region)\n", 528 | "\n", 529 | " pushed_model_dir = os.path.join(\n", 530 | " pushed_model_location, tf.io.gfile.listdir(pushed_model_location)[-1]\n", 531 | " )\n", 532 | "\n", 533 | " logging.info(f\"Model registry location: {pushed_model_dir}\")\n", 534 | "\n", 535 | " vertex_model = vertex_ai.Model.upload(\n", 536 | " display_name=model_display_name,\n", 537 | " artifact_uri=pushed_model_dir,\n", 538 | " serving_container_image_uri=serving_image_uri,\n", 539 | " parameters_schema_uri=None,\n", 540 | " instance_schema_uri=None,\n", 541 | " explanation_metadata=None,\n", 542 | " explanation_parameters=None,\n", 543 | " )\n", 544 | "\n", 545 | " uploaded_model.set_string_custom_property(\n", 546 | " \"model_resource_name\", str(vertex_model.resource_name)\n", 547 | " )\n", 548 | " logging.info(f\"Model resource: {str(vertex_model.resource_name)}\")" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": 12, 554 | "metadata": { 555 | "colab": { 556 | "base_uri": "https://localhost:8080/" 557 | }, 558 | "id": "5nUaRKSJczio", 559 | "outputId": "b62b81ae-cc2a-4551-d5a4-751b41bc89bc" 560 | }, 561 | "outputs": [ 562 | { 563 | "name": "stdout", 564 | "output_type": "stream", 565 | "text": [ 566 | "Writing vertex_deployer.py\n" 567 | ] 568 | } 569 | ], 570 | "source": [ 571 | "%%writefile {_vertex_deployer_module_file}\n", 572 | "\n", 573 | "from tfx.dsl.component.experimental.decorators import component\n", 574 | "from tfx.dsl.component.experimental.annotations import Parameter\n", 575 | "from tfx.types.standard_artifacts import String\n", 576 | "from google.cloud import aiplatform as vertex_ai\n", 577 | "from tfx import v1 as tfx\n", 578 | "from absl import logging\n", 579 | "\n", 580 | "\n", 581 | "@component\n", 582 | "def VertexDeployer(\n", 583 | " project: Parameter[str],\n", 584 | " region: Parameter[str],\n", 585 | " model_display_name: Parameter[str],\n", 586 | " deployed_model_display_name: Parameter[str],\n", 587 | "):\n", 588 | "\n", 589 | " logging.info(f\"Endpoint display: {deployed_model_display_name}\")\n", 590 | " vertex_ai.init(project=project, location=region)\n", 591 | "\n", 592 | " endpoints = vertex_ai.Endpoint.list(\n", 593 | " filter=f\"display_name={deployed_model_display_name}\", order_by=\"update_time\"\n", 594 | " )\n", 595 | "\n", 596 | " if len(endpoints) > 0:\n", 597 | " logging.info(f\"Endpoint {deployed_model_display_name} already exists.\")\n", 598 | " endpoint = endpoints[-1]\n", 599 | " else:\n", 600 | " endpoint = vertex_ai.Endpoint.create(deployed_model_display_name)\n", 601 | "\n", 602 | " model = vertex_ai.Model.list(\n", 603 | " filter=f\"display_name={model_display_name}\", order_by=\"update_time\"\n", 604 | " )[-1]\n", 605 | "\n", 606 | " endpoint = vertex_ai.Endpoint.list(\n", 607 | " filter=f\"display_name={deployed_model_display_name}\", order_by=\"update_time\"\n", 608 | " )[-1]\n", 609 | "\n", 610 | " deployed_model = endpoint.deploy(\n", 611 | " model=model,\n", 612 | " # Syntax from here: https://git.io/JBQDP\n", 613 | " traffic_split={\"0\": 100},\n", 614 | " machine_type=\"n1-standard-4\",\n", 615 | " min_replica_count=1,\n", 616 | " max_replica_count=1,\n", 617 | " )\n", 618 | "\n", 619 | " logging.info(f\"Model deployed to: {deployed_model}\")" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 13, 625 | "metadata": { 626 | "id": "QyR80VnedA9Y" 627 | }, 628 | "outputs": [], 629 | "source": [ 630 | "!mkdir -p ./custom_components\n", 631 | "!touch ./custom_components/__init__.py\n", 632 | "!cp -r {_vertex_uploader_module_file} {_vertex_deployer_module_file} custom_components" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": 14, 638 | "metadata": { 639 | "colab": { 640 | "base_uri": "https://localhost:8080/" 641 | }, 642 | "id": "DLXV-aRodEmH", 643 | "outputId": "e23b1a28-a329-470e-9d1b-89ac2fab820b" 644 | }, 645 | "outputs": [ 646 | { 647 | "name": "stdout", 648 | "output_type": "stream", 649 | "text": [ 650 | "total 8.0K\n", 651 | "-rw-r--r-- 1 root root 0 Oct 16 14:22 __init__.py\n", 652 | "-rw-r--r-- 1 root root 1.5K Oct 16 14:22 vertex_deployer.py\n", 653 | "-rw-r--r-- 1 root root 1.4K Oct 16 14:22 vertex_uploader.py\n" 654 | ] 655 | } 656 | ], 657 | "source": [ 658 | "!ls -lh custom_components" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 15, 664 | "metadata": { 665 | "colab": { 666 | "base_uri": "https://localhost:8080/" 667 | }, 668 | "id": "5-un8Vj1dGoL", 669 | "outputId": "56471ff8-29a0-49b7-fdd3-137a071f6613" 670 | }, 671 | "outputs": [ 672 | { 673 | "name": "stdout", 674 | "output_type": "stream", 675 | "text": [ 676 | "URI of the custom image: gcr.io/gcp-ml-172005/cifar10:tfx-1-2-0\n" 677 | ] 678 | } 679 | ], 680 | "source": [ 681 | "DATASET_DISPLAY_NAME = \"cifar10\"\n", 682 | "VERSION = \"tfx-1-2-0\"\n", 683 | "TFX_IMAGE_URI = f\"gcr.io/{GOOGLE_CLOUD_PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}\"\n", 684 | "print(f\"URI of the custom image: {TFX_IMAGE_URI}\")" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": { 691 | "colab": { 692 | "base_uri": "https://localhost:8080/" 693 | }, 694 | "id": "95lKF_6QdQ4o", 695 | "outputId": "5e4b8876-00f3-4045-c680-c9ed5f35dcd7" 696 | }, 697 | "outputs": [ 698 | { 699 | "name": "stdout", 700 | "output_type": "stream", 701 | "text": [ 702 | "Writing Dockerfile\n" 703 | ] 704 | } 705 | ], 706 | "source": [ 707 | "%%writefile Dockerfile\n", 708 | "\n", 709 | "FROM gcr.io/tfx-oss-public/tfx:1.2.0\n", 710 | "RUN mkdir -p custom_components\n", 711 | "COPY custom_components/* ./custom_components/\n", 712 | "RUN pip install --upgrade google-cloud-aiplatform" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "metadata": { 719 | "id": "Tc_K3jVCdXE8" 720 | }, 721 | "outputs": [], 722 | "source": [ 723 | "!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=15m --machine-type=e2-highcpu-8" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": { 729 | "id": "zGJU5sXrrAJW" 730 | }, 731 | "source": [ 732 | "# Pipeline" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 16, 738 | "metadata": { 739 | "id": "sEbNM9CeERX2" 740 | }, 741 | "outputs": [], 742 | "source": [ 743 | "# Specify training worker configurations. To minimize costs we can even specify two\n", 744 | "# different configurations: a beefier machine for the Endpoint model and slightly less\n", 745 | "# powerful machine for the mobile model.\n", 746 | "TRAINING_JOB_SPEC = {\n", 747 | " \"project\": GOOGLE_CLOUD_PROJECT,\n", 748 | " \"worker_pool_specs\": [\n", 749 | " {\n", 750 | " \"machine_spec\": {\n", 751 | " \"machine_type\": \"n1-standard-4\",\n", 752 | " \"accelerator_type\": \"NVIDIA_TESLA_K80\",\n", 753 | " \"accelerator_count\": 1,\n", 754 | " },\n", 755 | " \"replica_count\": 1,\n", 756 | " \"container_spec\": {\n", 757 | " \"image_uri\": \"gcr.io/tfx-oss-public/tfx:{}\".format(tfx.__version__),\n", 758 | " },\n", 759 | " }\n", 760 | " ],\n", 761 | "}" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 17, 767 | "metadata": { 768 | "id": "Ivc6LzpVuzKb" 769 | }, 770 | "outputs": [], 771 | "source": [ 772 | "SERVING_JOB_SPEC = {\n", 773 | " \"endpoint_name\": PIPELINE_NAME.replace(\"-\", \"_\"), # '-' is not allowed.\n", 774 | " \"project_id\": GOOGLE_CLOUD_PROJECT,\n", 775 | " \"min_replica_count\": 1,\n", 776 | " \"max_replica_count\": 1,\n", 777 | " \"machine_type\": \"n1-standard-2\",\n", 778 | "}" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 18, 784 | "metadata": { 785 | "id": "lOteqi0td5Vu" 786 | }, 787 | "outputs": [], 788 | "source": [ 789 | "from datetime import datetime\n", 790 | "\n", 791 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")" 792 | ] 793 | }, 794 | { 795 | "cell_type": "code", 796 | "execution_count": 19, 797 | "metadata": { 798 | "id": "EXtVu_w6Achq" 799 | }, 800 | "outputs": [], 801 | "source": [ 802 | "import tfx" 803 | ] 804 | }, 805 | { 806 | "cell_type": "markdown", 807 | "metadata": { 808 | "id": "qITdwKUKRZUg" 809 | }, 810 | "source": [ 811 | "Spanning feature is currently not working in TFX version <= 1.3.0. It will be fixed in the next release. But for now, as a workaround, this notebook uses `utils.calculate_splits_fingerprint_span_and_version` function manually. Also note that `RuntimeParameter` can't be set within `utils.calculate_splits_fingerprint_span_and_version`, so it is not possible to select the range of spans dynamically during the runtime. \n", 812 | "\n", 813 | "When the new relase of TFX comes out, this part will be fixed." 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": 42, 819 | "metadata": { 820 | "id": "ln1cvbcfphA9" 821 | }, 822 | "outputs": [], 823 | "source": [ 824 | "from tfx.orchestration import data_types\n", 825 | "\n", 826 | "from tfx import v1 as tfx\n", 827 | "from tfx.proto import example_gen_pb2, range_config_pb2\n", 828 | "from tfx.components.example_gen import utils\n", 829 | "\n", 830 | "from custom_components.vertex_uploader import VertexUploader\n", 831 | "from custom_components.vertex_deployer import VertexDeployer\n", 832 | "\n", 833 | "\n", 834 | "def _create_pipeline(\n", 835 | " input_config: data_types.RuntimeParameter,\n", 836 | " output_config: data_types.RuntimeParameter,\n", 837 | " pipeline_name: str,\n", 838 | " pipeline_root: str,\n", 839 | " data_root: str,\n", 840 | " serving_model_dir: str,\n", 841 | " trainer_module: str,\n", 842 | " project_id: str,\n", 843 | " region: str,\n", 844 | ") -> tfx.dsl.Pipeline:\n", 845 | " \"\"\"Creates a three component flowers pipeline with TFX.\"\"\"\n", 846 | " example_gen = tfx.components.ImportExampleGen(\n", 847 | " input_base=data_root, input_config=input_config, output_config=output_config\n", 848 | " )\n", 849 | "\n", 850 | " # Trainer\n", 851 | " trainer = tfx.extensions.google_cloud_ai_platform.Trainer(\n", 852 | " module_file=trainer_module,\n", 853 | " examples=example_gen.outputs[\"examples\"],\n", 854 | " train_args=tfx.proto.TrainArgs(splits=[\"train\"], num_steps=50000 // 64),\n", 855 | " eval_args=tfx.proto.EvalArgs(splits=[\"val\"], num_steps=10000 // 64),\n", 856 | " custom_config={\n", 857 | " tfx.extensions.google_cloud_ai_platform.ENABLE_VERTEX_KEY: True,\n", 858 | " tfx.extensions.google_cloud_ai_platform.VERTEX_REGION_KEY: region,\n", 859 | " tfx.extensions.google_cloud_ai_platform.TRAINING_ARGS_KEY: TRAINING_JOB_SPEC,\n", 860 | " \"use_gpu\": True,\n", 861 | " },\n", 862 | " ).with_id(\"trainer\")\n", 863 | "\n", 864 | " # Pushes the model to a filesystem destination.\n", 865 | " pushed_model_location = os.path.join(serving_model_dir, \"resnet50\")\n", 866 | " resnet_pusher = tfx.components.Pusher(\n", 867 | " model=trainer.outputs[\"model\"],\n", 868 | " push_destination=tfx.proto.PushDestination(\n", 869 | " filesystem=tfx.proto.PushDestination.Filesystem(\n", 870 | " base_directory=pushed_model_location\n", 871 | " )\n", 872 | " ),\n", 873 | " ).with_id(\"resnet_pusher\")\n", 874 | "\n", 875 | " # Vertex AI upload.\n", 876 | " model_display_name = \"resnet_cifar_latest\"\n", 877 | " uploader = VertexUploader(\n", 878 | " project=project_id,\n", 879 | " region=region,\n", 880 | " model_display_name=model_display_name,\n", 881 | " pushed_model_location=pushed_model_location,\n", 882 | " serving_image_uri=\"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-5:latest\",\n", 883 | " ).with_id(\"vertex_uploader\")\n", 884 | " uploader.add_upstream_node(resnet_pusher)\n", 885 | "\n", 886 | " # Create an endpoint.\n", 887 | " deployer = VertexDeployer(\n", 888 | " project=project_id,\n", 889 | " region=region,\n", 890 | " model_display_name=model_display_name,\n", 891 | " deployed_model_display_name=model_display_name + \"_\" + TIMESTAMP,\n", 892 | " ).with_id(\"vertex_deployer\")\n", 893 | " deployer.add_upstream_node(uploader)\n", 894 | "\n", 895 | " components = [\n", 896 | " example_gen,\n", 897 | " trainer,\n", 898 | " resnet_pusher,\n", 899 | " uploader,\n", 900 | " deployer,\n", 901 | " ]\n", 902 | "\n", 903 | " return tfx.dsl.Pipeline(\n", 904 | " pipeline_name=pipeline_name,\n", 905 | " pipeline_root=pipeline_root,\n", 906 | " components=components,\n", 907 | " enable_cache=True,\n", 908 | " )" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": { 914 | "id": "IFdlslfOX54z" 915 | }, 916 | "source": [ 917 | "## Compile the pipeline" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": 43, 923 | "metadata": { 924 | "id": "-AY5Z2tbsbwE" 925 | }, 926 | "outputs": [], 927 | "source": [ 928 | "import os\n", 929 | "\n", 930 | "PIPELINE_DEFINITION_FILE = PIPELINE_NAME + \"_pipeline.json\"\n", 931 | "\n", 932 | "# Important: We need to pass the custom Docker image URI to the\n", 933 | "# `KubeflowV2DagRunnerConfig` to take effect.\n", 934 | "runner = tfx.orchestration.experimental.KubeflowV2DagRunner(\n", 935 | " config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(\n", 936 | " default_image=TFX_IMAGE_URI\n", 937 | " ),\n", 938 | " output_filename=PIPELINE_DEFINITION_FILE,\n", 939 | ")\n", 940 | "\n", 941 | "_ = runner.run(\n", 942 | " _create_pipeline(\n", 943 | " input_config=tfx.dsl.experimental.RuntimeParameter(\n", 944 | " name=\"input-config\",\n", 945 | " default='{\"input_config\": {\"splits\": [{\"name\":\"train\", \"pattern\":\"span-1/train/tfrecord\"}, {\"name\":\"val\", \"pattern\":\"span-1/test/tfrecord\"}]}}',\n", 946 | " ptype=str,\n", 947 | " ),\n", 948 | " output_config=tfx.dsl.experimental.RuntimeParameter(\n", 949 | " name=\"output-config\", default=\"{}\", ptype=str,\n", 950 | " ),\n", 951 | " pipeline_name=PIPELINE_NAME,\n", 952 | " pipeline_root=PIPELINE_ROOT,\n", 953 | " data_root=DATA_ROOT,\n", 954 | " serving_model_dir=SERVING_MODEL_DIR,\n", 955 | " trainer_module=os.path.join(MODULE_ROOT, _trainer_module_file),\n", 956 | " project_id=GOOGLE_CLOUD_PROJECT,\n", 957 | " region=GOOGLE_CLOUD_REGION,\n", 958 | " )\n", 959 | ")" 960 | ] 961 | }, 962 | { 963 | "cell_type": "code", 964 | "execution_count": 48, 965 | "metadata": { 966 | "colab": { 967 | "base_uri": "https://localhost:8080/" 968 | }, 969 | "id": "oCSQ98YN-F6v", 970 | "outputId": "d82ffbb8-7320-420d-9983-2f3b7476f075" 971 | }, 972 | "outputs": [ 973 | { 974 | "name": "stdout", 975 | "output_type": "stream", 976 | "text": [ 977 | "Copying file://continuous-adaptation-for-data-changes_pipeline.json [Content-Type=application/json]...\n", 978 | "/ [1 files][ 8.7 KiB/ 8.7 KiB] \n", 979 | "Operation completed over 1 objects/8.7 KiB. \n", 980 | " 8.69 KiB 2021-10-16T15:51:22Z gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes/continuous-adaptation-for-data-changes_pipeline.json\n", 981 | " gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes/874401645461/\n", 982 | "TOTAL: 1 objects, 8896 bytes (8.69 KiB)\n" 983 | ] 984 | } 985 | ], 986 | "source": [ 987 | "!gsutil cp {PIPELINE_DEFINITION_FILE} {PIPELINE_ROOT}/\n", 988 | "!gsutil ls -lh {PIPELINE_ROOT}/" 989 | ] 990 | }, 991 | { 992 | "cell_type": "markdown", 993 | "metadata": { 994 | "id": "ocHBJaR_X7x2" 995 | }, 996 | "source": [ 997 | "## Submit the pipeline for execution to Vertex AI\n", 998 | "\n", 999 | "Generally, it's a good idea to first do a local run of the end-to-end pipeline before submitting it an online orchestrator. We can use `tfx.orchestration.LocalDagRunner()` for that but for the purposes of this notebook we won't be doing that. " 1000 | ] 1001 | }, 1002 | { 1003 | "cell_type": "code", 1004 | "execution_count": 27, 1005 | "metadata": { 1006 | "colab": { 1007 | "base_uri": "https://localhost:8080/" 1008 | }, 1009 | "id": "3elrtDOus83z", 1010 | "outputId": "cb8af6dc-ed37-447a-e8a6-5aefed30211e" 1011 | }, 1012 | "outputs": [ 1013 | { 1014 | "name": "stderr", 1015 | "output_type": "stream", 1016 | "text": [ 1017 | "WARNING:google.auth._default:No project ID could be determined. Consider running `gcloud config set project` or setting the GOOGLE_CLOUD_PROJECT environment variable\n" 1018 | ] 1019 | } 1020 | ], 1021 | "source": [ 1022 | "from kfp.v2.google import client\n", 1023 | "\n", 1024 | "pipelines_client = client.AIPlatformClient(\n", 1025 | " project_id=GOOGLE_CLOUD_PROJECT, region=GOOGLE_CLOUD_REGION,\n", 1026 | ")" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 47, 1032 | "metadata": { 1033 | "colab": { 1034 | "base_uri": "https://localhost:8080/", 1035 | "height": 34 1036 | }, 1037 | "id": "TiSaBREqfa86", 1038 | "outputId": "9b0addc0-9677-4202-9ffb-128485972bc1" 1039 | }, 1040 | "outputs": [ 1041 | { 1042 | "data": { 1043 | "text/html": [ 1044 | "See the Pipeline job here." 1045 | ], 1046 | "text/plain": [ 1047 | "" 1048 | ] 1049 | }, 1050 | "metadata": {}, 1051 | "output_type": "display_data" 1052 | } 1053 | ], 1054 | "source": [ 1055 | "import json\n", 1056 | "from tfx.orchestration import data_types\n", 1057 | "\n", 1058 | "_ = pipelines_client.create_run_from_job_spec(\n", 1059 | " PIPELINE_DEFINITION_FILE,\n", 1060 | " enable_caching=False,\n", 1061 | " parameter_values={\n", 1062 | " \"input-config\": json.dumps(\n", 1063 | " {\n", 1064 | " \"splits\": [\n", 1065 | " {\"name\": \"train\", \"pattern\": \"span-[12]/train/*.tfrecord\"},\n", 1066 | " {\"name\": \"val\", \"pattern\": \"span-[12]/test/*.tfrecord\"},\n", 1067 | " ]\n", 1068 | " }\n", 1069 | " ),\n", 1070 | " \"output-config\": json.dumps({}),\n", 1071 | " },\n", 1072 | ")" 1073 | ] 1074 | } 1075 | ], 1076 | "metadata": { 1077 | "colab": { 1078 | "collapsed_sections": [], 1079 | "include_colab_link": true, 1080 | "machine_shape": "hm", 1081 | "name": "Custom_Model_TFX", 1082 | "provenance": [] 1083 | }, 1084 | "environment": { 1085 | "name": "tf2-gpu.2-4.mnightly-2021-02-02-debian-10-test", 1086 | "type": "gcloud", 1087 | "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-4:mnightly-2021-02-02-debian-10-test" 1088 | }, 1089 | "kernelspec": { 1090 | "display_name": "Python 3 (ipykernel)", 1091 | "language": "python", 1092 | "name": "python3" 1093 | }, 1094 | "language_info": { 1095 | "codemirror_mode": { 1096 | "name": "ipython", 1097 | "version": 3 1098 | }, 1099 | "file_extension": ".py", 1100 | "mimetype": "text/x-python", 1101 | "name": "python", 1102 | "nbconvert_exporter": "python", 1103 | "pygments_lexer": "ipython3", 1104 | "version": "3.8.2" 1105 | } 1106 | }, 1107 | "nbformat": 4, 1108 | "nbformat_minor": 1 1109 | } 1110 | -------------------------------------------------------------------------------- /notebooks/01_Dataset_Prep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "zyOcSJS29gkH" 17 | }, 18 | "source": [ 19 | "In this notebook, we will download [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset from [TensorFlow Dataset(TFDS)](https://www.tensorflow.org/datasets). The dataset is alreadly prepared as TFRecord format.\n", 20 | "\n", 21 | "We will push the downloaded dataset to a GCS bucket while keeping the directory strucutres like below.\n", 22 | "- gs://bucket-name/span-1/train/train.tfrecord\n", 23 | "- gs://bucket-name/span-1/test/test.tfrecord\n", 24 | "\n", 25 | "To proceed with the rest of the notebook you'd need a billing-enabled GCP account. " 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "pJ2m7-bbxh4h" 32 | }, 33 | "source": [ 34 | "## Prerequisites\n", 35 | "- Add the following rules to IAM\n", 36 | " - Storage Object Admin\n", 37 | " - Storage Object Creator" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "id": "TRIz8jbQ-MUb" 44 | }, 45 | "source": [ 46 | "## Setup\n", 47 | "\n", 48 | "In order to access Google Cloud Platform from Colab environment, we need to login to GCP account with `gcloud init` command." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 1, 54 | "metadata": { 55 | "colab": { 56 | "base_uri": "https://localhost:8080/" 57 | }, 58 | "id": "lIYdn1woOS1n", 59 | "outputId": "69c253ee-ed9a-420b-9d71-8d89c5da8217" 60 | }, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "Welcome! This command will take you through the configuration of gcloud.\n", 67 | "\n", 68 | "Settings from your current configuration [default] are:\n", 69 | "component_manager:\n", 70 | " disable_update_check: 'True'\n", 71 | "compute:\n", 72 | " gce_metadata_read_timeout_sec: '0'\n", 73 | "\n", 74 | "Pick configuration to use:\n", 75 | " [1] Re-initialize this configuration [default] with new settings \n", 76 | " [2] Create a new configuration\n", 77 | "Please enter your numeric choice: 2\n", 78 | "\n", 79 | "Enter configuration name. Names start with a lower case letter and \n", 80 | "contain only lower case letters a-z, digits 0-9, and hyphens '-': gde\n", 81 | "Your current configuration has been set to: [gde]\n", 82 | "\n", 83 | "You can skip diagnostics next time by using the following flag:\n", 84 | " gcloud init --skip-diagnostics\n", 85 | "\n", 86 | "Network diagnostic detects and fixes local network connection issues.\n", 87 | "Reachability Check passed.\n", 88 | "Network diagnostic passed (1/1 checks passed).\n", 89 | "\n", 90 | "You must log in to continue. Would you like to log in (Y/n)? Y\n", 91 | "\n", 92 | "Go to the following link in your browser:\n", 93 | "\n", 94 | " https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=4OCQ4k8wDfa1lmE2eGQdWcKgfqIafI&prompt=consent&access_type=offline&code_challenge=2Mhw5bza8PZZdzFTIavmbxf4VapMEnPbNqgba2lk6kU&code_challenge_method=S256\n", 95 | "\n", 96 | "Enter verification code: 4/1AX4XfWidQVLj29O4VcpmmBw8NMEP7eVI4kNs2L76NERMizZMTHwC2vxPhec\n", 97 | "You are logged in as: [deep.diver.csp@gmail.com].\n", 98 | "\n", 99 | "Pick cloud project to use: \n", 100 | " [1] codelabs-temp\n", 101 | " [2] data-governance-tutorials\n", 102 | " [3] fast-ai-exploration\n", 103 | " [4] gcnresearch\n", 104 | " [5] gcp-ml-172005\n", 105 | " [6] gdeproj\n", 106 | " [7] gdeprojects\n", 107 | " [8] imrenagicom-support\n", 108 | " [9] jax-tpu-bfloat16\n", 109 | " [10] mobile-week-holder\n", 110 | " [11] my-dsc-solution-1\n", 111 | " [12] notional-cirrus-235403\n", 112 | " [13] sample-project-1-314222\n", 113 | " [14] satoluxx-gde\n", 114 | " [15] spreadsheet-api-sample\n", 115 | " [16] Create a new project\n", 116 | "Please enter numeric choice or text value (must exactly match list \n", 117 | "item): 5\n", 118 | "\n", 119 | "Your current project has been set to: [gcp-ml-172005].\n", 120 | "\n", 121 | "Do you want to configure a default Compute Region and Zone? (Y/n)? Y\n", 122 | "\n", 123 | "Which Google Compute Engine zone would you like to use as project \n", 124 | "default?\n", 125 | "If you do not specify a zone via a command line flag while working \n", 126 | "with Compute Engine resources, the default is assumed.\n", 127 | " [1] us-east1-b\n", 128 | " [2] us-east1-c\n", 129 | " [3] us-east1-d\n", 130 | " [4] us-east4-c\n", 131 | " [5] us-east4-b\n", 132 | " [6] us-east4-a\n", 133 | " [7] us-central1-c\n", 134 | " [8] us-central1-a\n", 135 | " [9] us-central1-f\n", 136 | " [10] us-central1-b\n", 137 | " [11] us-west1-b\n", 138 | " [12] us-west1-c\n", 139 | " [13] us-west1-a\n", 140 | " [14] europe-west4-a\n", 141 | " [15] europe-west4-b\n", 142 | " [16] europe-west4-c\n", 143 | " [17] europe-west1-b\n", 144 | " [18] europe-west1-d\n", 145 | " [19] europe-west1-c\n", 146 | " [20] europe-west3-c\n", 147 | " [21] europe-west3-a\n", 148 | " [22] europe-west3-b\n", 149 | " [23] europe-west2-c\n", 150 | " [24] europe-west2-b\n", 151 | " [25] europe-west2-a\n", 152 | " [26] asia-east1-b\n", 153 | " [27] asia-east1-a\n", 154 | " [28] asia-east1-c\n", 155 | " [29] asia-southeast1-b\n", 156 | " [30] asia-southeast1-a\n", 157 | " [31] asia-southeast1-c\n", 158 | " [32] asia-northeast1-b\n", 159 | " [33] asia-northeast1-c\n", 160 | " [34] asia-northeast1-a\n", 161 | " [35] asia-south1-c\n", 162 | " [36] asia-south1-b\n", 163 | " [37] asia-south1-a\n", 164 | " [38] australia-southeast1-b\n", 165 | " [39] australia-southeast1-c\n", 166 | " [40] australia-southeast1-a\n", 167 | " [41] southamerica-east1-b\n", 168 | " [42] southamerica-east1-c\n", 169 | " [43] southamerica-east1-a\n", 170 | " [44] asia-east2-a\n", 171 | " [45] asia-east2-b\n", 172 | " [46] asia-east2-c\n", 173 | " [47] asia-northeast2-a\n", 174 | " [48] asia-northeast2-b\n", 175 | " [49] asia-northeast2-c\n", 176 | " [50] asia-northeast3-a\n", 177 | "Did not print [36] options.\n", 178 | "Too many options [86]. Enter \"list\" at prompt to print choices fully.\n", 179 | "Please enter numeric choice or text value (must exactly match list \n", 180 | "item): 8\n", 181 | "\n", 182 | "Your project default Compute Engine zone has been set to [us-central1-a].\n", 183 | "You can change it by running [gcloud config set compute/zone NAME].\n", 184 | "\n", 185 | "Your project default Compute Engine region has been set to [us-central1].\n", 186 | "You can change it by running [gcloud config set compute/region NAME].\n", 187 | "\n", 188 | "Your Google Cloud SDK is configured and ready to use!\n", 189 | "\n", 190 | "* Commands that require authentication will use deep.diver.csp@gmail.com by default\n", 191 | "* Commands will reference project `gcp-ml-172005` by default\n", 192 | "* Compute Engine commands will use region `us-central1` by default\n", 193 | "* Compute Engine commands will use zone `us-central1-a` by default\n", 194 | "\n", 195 | "Run `gcloud help config` to learn how to change individual settings\n", 196 | "\n", 197 | "This gcloud configuration is called [gde]. You can create additional configurations if you work with multiple accounts and/or projects.\n", 198 | "Run `gcloud topic configurations` to learn more.\n", 199 | "\n", 200 | "Some things to try next:\n", 201 | "\n", 202 | "* Run `gcloud --help` to see the Cloud Platform services you can interact with. And run `gcloud help COMMAND` to get help on any gcloud command.\n", 203 | "* Run `gcloud topic --help` to learn about advanced features of the SDK like arg files and output formatting\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "!gcloud init" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 2, 214 | "metadata": { 215 | "id": "mhDIpjq3yuGV" 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "from google.colab import auth\n", 220 | "\n", 221 | "auth.authenticate_user()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": { 227 | "id": "bG-NwjjB-ioI" 228 | }, 229 | "source": [ 230 | "## Download the original dataset and copy over to a GCS Bucket" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "FXvVVA_joPBL" 237 | }, 238 | "source": [ 239 | "### 1. Create Directories\n", 240 | "\n", 241 | "In this step we are going to create directories to hold to be downloaded TFRecord dataset. As an intial phase, the training and testing dataset will be stored in `span-1/train` and `span-1/test` directoreis respectively.\n", 242 | "\n", 243 | "When there will be more data with the same distribution, we can update the currently stored dataset. In this case, you should turn on the [GCS's versioning feature](https://cloud.google.com/storage/docs/object-versioning).\n", 244 | "\n", 245 | "When there will be more data with the different distribution, we will create other directores of `span-2/train` and `span-2/test` to address data drift. In this way, we can keep data separetly for easier maintanence while handling versioning separtely for different `SPAN`s." 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 3, 251 | "metadata": { 252 | "id": "XaAx0ZJ2QsGp" 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "TARGET_ROOT_DIR = \"cifar10\"\n", 257 | "TARGET_TRAIN_DIR = TARGET_ROOT_DIR + \"/span-1/train\"\n", 258 | "TARGET_TEST_DIR = TARGET_ROOT_DIR + \"/span-1/test\"\n", 259 | "\n", 260 | "!mkdir -p {TARGET_TRAIN_DIR}\n", 261 | "!mkdir -p {TARGET_TEST_DIR}" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": { 267 | "id": "qt5er4ywpfGv" 268 | }, 269 | "source": [ 270 | "### 2. Download CIFAR10 Dataset with TFDS" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 4, 276 | "metadata": { 277 | "colab": { 278 | "base_uri": "https://localhost:8080/", 279 | "height": 296, 280 | "referenced_widgets": [ 281 | "fb0e11738e734dc6a7428c4a82e81705", 282 | "4f7c45939d3f49eb856ccc58f999e37b", 283 | "a327863b15a54d1eb8cd2c26ee31c826", 284 | "d46fa732fb474fce8f940c031ca06637", 285 | "fcf94990ac4f46b2a0fd54e06eb0f88b", 286 | "1a9b33aebfb444a9b3d9442a5d72cea2", 287 | "710373ad31fe4dc0b29c7c681e345cc8", 288 | "dbe66bad8ba94b30b93c7c9d25cf8fbd", 289 | "9613736bf85c44338d6f5609e6efc90b", 290 | "385a10a4ca644ca38db0f539be5a54d5", 291 | "ecefe53c759e4a54aa1f3ed44444fdf5", 292 | "4d9642439f084654b63b4e7bde24b89c", 293 | "fd5c76c7cda94b22814409cbda62a38d", 294 | "158453b7af414664b766d99ecbb9429e", 295 | "6bf37bc7f5684a8eb3dcf625d02616fd", 296 | "a7265046f01343a0a9de7635e60e2d5f", 297 | "b7e8c1bf6e394e61aae415a735b18ffe", 298 | "c8c9446c1a2f4a3785600ab596a451c9", 299 | "a9e7a7b675844f1ab616bb1972c15d32", 300 | "20d532a52ecd479fb7dfa9342decd2d1", 301 | "0b02c308f2604dd28dd5b430d6239253", 302 | "b515ef8f06744ce486f110326909e90e", 303 | "af4562715ddd41e786e717033a6f3fec", 304 | "5b27a0a8a6964be49befadfc9d209c93", 305 | "d0c79d32309640a18b1097e50c273489", 306 | "2f93c4391a204a26a8179b0a6b46a23e", 307 | "4fdde132c9744c74bbf7aa7a3185ae64", 308 | "6f08665adc814ac09e7cb21e0533866c", 309 | "fd5460933db44c92b60e41b065551215", 310 | "e3c232f427c649f2bf3f58ed97ad15d1", 311 | "29aa47ab203d4099aad31f79c72421f4", 312 | "5a452ead65af499bbf868b5314dd7196", 313 | "9b8d5063647d466684c6e1b196a90796" 314 | ] 315 | }, 316 | "id": "dWUoW87xtuQ8", 317 | "outputId": "a6e61183-396a-4912-8f01-94f82185da15" 318 | }, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "\u001b[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...\u001b[0m\n" 325 | ] 326 | }, 327 | { 328 | "data": { 329 | "application/vnd.jupyter.widget-view+json": { 330 | "model_id": "fb0e11738e734dc6a7428c4a82e81705", 331 | "version_major": 2, 332 | "version_minor": 0 333 | }, 334 | "text/plain": [ 335 | "Dl Completed...: 0 url [00:00, ? url/s]" 336 | ] 337 | }, 338 | "metadata": {}, 339 | "output_type": "display_data" 340 | }, 341 | { 342 | "data": { 343 | "application/vnd.jupyter.widget-view+json": { 344 | "model_id": "4d9642439f084654b63b4e7bde24b89c", 345 | "version_major": 2, 346 | "version_minor": 0 347 | }, 348 | "text/plain": [ 349 | "Dl Size...: 0 MiB [00:00, ? MiB/s]" 350 | ] 351 | }, 352 | "metadata": {}, 353 | "output_type": "display_data" 354 | }, 355 | { 356 | "data": { 357 | "application/vnd.jupyter.widget-view+json": { 358 | "model_id": "af4562715ddd41e786e717033a6f3fec", 359 | "version_major": 2, 360 | "version_minor": 0 361 | }, 362 | "text/plain": [ 363 | "Extraction completed...: 0 file [00:00, ? file/s]" 364 | ] 365 | }, 366 | "metadata": {}, 367 | "output_type": "display_data" 368 | }, 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "\n", 374 | "\n", 375 | "\n" 376 | ] 377 | }, 378 | { 379 | "data": { 380 | "application/vnd.jupyter.widget-view+json": { 381 | "model_id": "26d3cc1e67244584878097dd9b080a17", 382 | "version_major": 2, 383 | "version_minor": 0 384 | }, 385 | "text/plain": [ 386 | "0 examples [00:00, ? examples/s]" 387 | ] 388 | }, 389 | "metadata": {}, 390 | "output_type": "display_data" 391 | }, 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteYSSAF4/cifar10-train.tfrecord\n" 397 | ] 398 | }, 399 | { 400 | "data": { 401 | "application/vnd.jupyter.widget-view+json": { 402 | "model_id": "c8243d1036a6458881205806c1e98661", 403 | "version_major": 2, 404 | "version_minor": 0 405 | }, 406 | "text/plain": [ 407 | " 0%| | 0/50000 [00:00 Please note this section only works within GCP Vertex Notebook environment due to the authentication issue. If you know how to setup GCS access privilege for TFX, please let me know." 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": null, 615 | "metadata": { 616 | "id": "Dw8IvNVRllqI" 617 | }, 618 | "outputs": [], 619 | "source": [ 620 | "!pip install tfx==1.2.0" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": null, 626 | "metadata": { 627 | "id": "Xi8xgC8wZqVD" 628 | }, 629 | "outputs": [], 630 | "source": [ 631 | "from tfx import v1 as tfx\n", 632 | "from tfx.components.example_gen import utils" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": null, 638 | "metadata": { 639 | "id": "Ma2QzXrEZuVX" 640 | }, 641 | "outputs": [], 642 | "source": [ 643 | "from tfx.proto import example_gen_pb2\n", 644 | "\n", 645 | "_DATA_PATH = \"gs://cifar10-csp-public\"\n", 646 | "\n", 647 | "splits = [\n", 648 | " example_gen_pb2.Input.Split(name=\"train\", pattern=\"span-{SPAN}/train/*\"),\n", 649 | " example_gen_pb2.Input.Split(name=\"val\", pattern=\"span-{SPAN}/test/*\"),\n", 650 | "]\n", 651 | "\n", 652 | "_, span, version = utils.calculate_splits_fingerprint_span_and_version(\n", 653 | " _DATA_PATH, splits\n", 654 | ")" 655 | ] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": null, 660 | "metadata": { 661 | "colab": { 662 | "base_uri": "https://localhost:8080/" 663 | }, 664 | "id": "UZFygbeUaccg", 665 | "outputId": "06a298da-7509-48c8-9ee9-58c3dc320f5b" 666 | }, 667 | "outputs": [ 668 | { 669 | "data": { 670 | "text/plain": [ 671 | "(1, None)" 672 | ] 673 | }, 674 | "execution_count": 8, 675 | "metadata": {}, 676 | "output_type": "execute_result" 677 | } 678 | ], 679 | "source": [ 680 | "span, version" 681 | ] 682 | } 683 | ], 684 | "metadata": { 685 | "colab": { 686 | "include_colab_link": true, 687 | "name": "Dataset_Prep", 688 | "provenance": [] 689 | }, 690 | "kernelspec": { 691 | "display_name": "Python 3 (ipykernel)", 692 | "language": "python", 693 | "name": "python3" 694 | }, 695 | "language_info": { 696 | "codemirror_mode": { 697 | "name": "ipython", 698 | "version": 3 699 | }, 700 | "file_extension": ".py", 701 | "mimetype": "text/x-python", 702 | "name": "python", 703 | "nbconvert_exporter": "python", 704 | "pygments_lexer": "ipython3", 705 | "version": "3.8.2" 706 | }, 707 | "widgets": { 708 | "application/vnd.jupyter.widget-state+json": { 709 | "0b02c308f2604dd28dd5b430d6239253": { 710 | "model_module": "@jupyter-widgets/controls", 711 | "model_module_version": "1.5.0", 712 | "model_name": "DescriptionStyleModel", 713 | "state": { 714 | "_model_module": "@jupyter-widgets/controls", 715 | "_model_module_version": "1.5.0", 716 | "_model_name": "DescriptionStyleModel", 717 | "_view_count": null, 718 | "_view_module": "@jupyter-widgets/base", 719 | "_view_module_version": "1.2.0", 720 | "_view_name": "StyleView", 721 | "description_width": "" 722 | } 723 | }, 724 | "158453b7af414664b766d99ecbb9429e": { 725 | "model_module": "@jupyter-widgets/controls", 726 | "model_module_version": "1.5.0", 727 | "model_name": "HTMLModel", 728 | "state": { 729 | "_dom_classes": [], 730 | "_model_module": "@jupyter-widgets/controls", 731 | "_model_module_version": "1.5.0", 732 | "_model_name": "HTMLModel", 733 | "_view_count": null, 734 | "_view_module": "@jupyter-widgets/controls", 735 | "_view_module_version": "1.5.0", 736 | "_view_name": "HTMLView", 737 | "description": "", 738 | "description_tooltip": null, 739 | "layout": "IPY_MODEL_c8c9446c1a2f4a3785600ab596a451c9", 740 | "placeholder": "​", 741 | "style": "IPY_MODEL_b7e8c1bf6e394e61aae415a735b18ffe", 742 | "value": "Dl Size...: 100%" 743 | } 744 | }, 745 | "1a9b33aebfb444a9b3d9442a5d72cea2": { 746 | "model_module": "@jupyter-widgets/controls", 747 | "model_module_version": "1.5.0", 748 | "model_name": "DescriptionStyleModel", 749 | "state": { 750 | "_model_module": "@jupyter-widgets/controls", 751 | "_model_module_version": "1.5.0", 752 | "_model_name": "DescriptionStyleModel", 753 | "_view_count": null, 754 | "_view_module": "@jupyter-widgets/base", 755 | "_view_module_version": "1.2.0", 756 | "_view_name": "StyleView", 757 | "description_width": "" 758 | } 759 | }, 760 | "20d532a52ecd479fb7dfa9342decd2d1": { 761 | "model_module": "@jupyter-widgets/base", 762 | "model_module_version": "1.2.0", 763 | "model_name": "LayoutModel", 764 | "state": { 765 | "_model_module": "@jupyter-widgets/base", 766 | "_model_module_version": "1.2.0", 767 | "_model_name": "LayoutModel", 768 | "_view_count": null, 769 | "_view_module": "@jupyter-widgets/base", 770 | "_view_module_version": "1.2.0", 771 | "_view_name": "LayoutView", 772 | "align_content": null, 773 | "align_items": null, 774 | "align_self": null, 775 | "border": null, 776 | "bottom": null, 777 | "display": null, 778 | "flex": null, 779 | "flex_flow": null, 780 | "grid_area": null, 781 | "grid_auto_columns": null, 782 | "grid_auto_flow": null, 783 | "grid_auto_rows": null, 784 | "grid_column": null, 785 | "grid_gap": null, 786 | "grid_row": null, 787 | "grid_template_areas": null, 788 | "grid_template_columns": null, 789 | "grid_template_rows": null, 790 | "height": null, 791 | "justify_content": null, 792 | "justify_items": null, 793 | "left": null, 794 | "margin": null, 795 | "max_height": null, 796 | "max_width": null, 797 | "min_height": null, 798 | "min_width": null, 799 | "object_fit": null, 800 | "object_position": null, 801 | "order": null, 802 | "overflow": null, 803 | "overflow_x": null, 804 | "overflow_y": null, 805 | "padding": null, 806 | "right": null, 807 | "top": null, 808 | "visibility": null, 809 | "width": "20px" 810 | } 811 | }, 812 | "29aa47ab203d4099aad31f79c72421f4": { 813 | "model_module": "@jupyter-widgets/base", 814 | "model_module_version": "1.2.0", 815 | "model_name": "LayoutModel", 816 | "state": { 817 | "_model_module": "@jupyter-widgets/base", 818 | "_model_module_version": "1.2.0", 819 | "_model_name": "LayoutModel", 820 | "_view_count": null, 821 | "_view_module": "@jupyter-widgets/base", 822 | "_view_module_version": "1.2.0", 823 | "_view_name": "LayoutView", 824 | "align_content": null, 825 | "align_items": null, 826 | "align_self": null, 827 | "border": null, 828 | "bottom": null, 829 | "display": null, 830 | "flex": null, 831 | "flex_flow": null, 832 | "grid_area": null, 833 | "grid_auto_columns": null, 834 | "grid_auto_flow": null, 835 | "grid_auto_rows": null, 836 | "grid_column": null, 837 | "grid_gap": null, 838 | "grid_row": null, 839 | "grid_template_areas": null, 840 | "grid_template_columns": null, 841 | "grid_template_rows": null, 842 | "height": null, 843 | "justify_content": null, 844 | "justify_items": null, 845 | "left": null, 846 | "margin": null, 847 | "max_height": null, 848 | "max_width": null, 849 | "min_height": null, 850 | "min_width": null, 851 | "object_fit": null, 852 | "object_position": null, 853 | "order": null, 854 | "overflow": null, 855 | "overflow_x": null, 856 | "overflow_y": null, 857 | "padding": null, 858 | "right": null, 859 | "top": null, 860 | "visibility": null, 861 | "width": "20px" 862 | } 863 | }, 864 | "2f93c4391a204a26a8179b0a6b46a23e": { 865 | "model_module": "@jupyter-widgets/controls", 866 | "model_module_version": "1.5.0", 867 | "model_name": "FloatProgressModel", 868 | "state": { 869 | "_dom_classes": [], 870 | "_model_module": "@jupyter-widgets/controls", 871 | "_model_module_version": "1.5.0", 872 | "_model_name": "FloatProgressModel", 873 | "_view_count": null, 874 | "_view_module": "@jupyter-widgets/controls", 875 | "_view_module_version": "1.5.0", 876 | "_view_name": "ProgressView", 877 | "bar_style": "success", 878 | "description": "", 879 | "description_tooltip": null, 880 | "layout": "IPY_MODEL_29aa47ab203d4099aad31f79c72421f4", 881 | "max": 1, 882 | "min": 0, 883 | "orientation": "horizontal", 884 | "style": "IPY_MODEL_e3c232f427c649f2bf3f58ed97ad15d1", 885 | "value": 1 886 | } 887 | }, 888 | "385a10a4ca644ca38db0f539be5a54d5": { 889 | "model_module": "@jupyter-widgets/controls", 890 | "model_module_version": "1.5.0", 891 | "model_name": "DescriptionStyleModel", 892 | "state": { 893 | "_model_module": "@jupyter-widgets/controls", 894 | "_model_module_version": "1.5.0", 895 | "_model_name": "DescriptionStyleModel", 896 | "_view_count": null, 897 | "_view_module": "@jupyter-widgets/base", 898 | "_view_module_version": "1.2.0", 899 | "_view_name": "StyleView", 900 | "description_width": "" 901 | } 902 | }, 903 | "4d9642439f084654b63b4e7bde24b89c": { 904 | "model_module": "@jupyter-widgets/controls", 905 | "model_module_version": "1.5.0", 906 | "model_name": "HBoxModel", 907 | "state": { 908 | "_dom_classes": [], 909 | "_model_module": "@jupyter-widgets/controls", 910 | "_model_module_version": "1.5.0", 911 | "_model_name": "HBoxModel", 912 | "_view_count": null, 913 | "_view_module": "@jupyter-widgets/controls", 914 | "_view_module_version": "1.5.0", 915 | "_view_name": "HBoxView", 916 | "box_style": "", 917 | "children": [ 918 | "IPY_MODEL_158453b7af414664b766d99ecbb9429e", 919 | "IPY_MODEL_6bf37bc7f5684a8eb3dcf625d02616fd", 920 | "IPY_MODEL_a7265046f01343a0a9de7635e60e2d5f" 921 | ], 922 | "layout": "IPY_MODEL_fd5c76c7cda94b22814409cbda62a38d" 923 | } 924 | }, 925 | "4f7c45939d3f49eb856ccc58f999e37b": { 926 | "model_module": "@jupyter-widgets/base", 927 | "model_module_version": "1.2.0", 928 | "model_name": "LayoutModel", 929 | "state": { 930 | "_model_module": "@jupyter-widgets/base", 931 | "_model_module_version": "1.2.0", 932 | "_model_name": "LayoutModel", 933 | "_view_count": null, 934 | "_view_module": "@jupyter-widgets/base", 935 | "_view_module_version": "1.2.0", 936 | "_view_name": "LayoutView", 937 | "align_content": null, 938 | "align_items": null, 939 | "align_self": null, 940 | "border": null, 941 | "bottom": null, 942 | "display": null, 943 | "flex": null, 944 | "flex_flow": null, 945 | "grid_area": null, 946 | "grid_auto_columns": null, 947 | "grid_auto_flow": null, 948 | "grid_auto_rows": null, 949 | "grid_column": null, 950 | "grid_gap": null, 951 | "grid_row": null, 952 | "grid_template_areas": null, 953 | "grid_template_columns": null, 954 | "grid_template_rows": null, 955 | "height": null, 956 | "justify_content": null, 957 | "justify_items": null, 958 | "left": null, 959 | "margin": null, 960 | "max_height": null, 961 | "max_width": null, 962 | "min_height": null, 963 | "min_width": null, 964 | "object_fit": null, 965 | "object_position": null, 966 | "order": null, 967 | "overflow": null, 968 | "overflow_x": null, 969 | "overflow_y": null, 970 | "padding": null, 971 | "right": null, 972 | "top": null, 973 | "visibility": null, 974 | "width": null 975 | } 976 | }, 977 | "4fdde132c9744c74bbf7aa7a3185ae64": { 978 | "model_module": "@jupyter-widgets/controls", 979 | "model_module_version": "1.5.0", 980 | "model_name": "HTMLModel", 981 | "state": { 982 | "_dom_classes": [], 983 | "_model_module": "@jupyter-widgets/controls", 984 | "_model_module_version": "1.5.0", 985 | "_model_name": "HTMLModel", 986 | "_view_count": null, 987 | "_view_module": "@jupyter-widgets/controls", 988 | "_view_module_version": "1.5.0", 989 | "_view_name": "HTMLView", 990 | "description": "", 991 | "description_tooltip": null, 992 | "layout": "IPY_MODEL_9b8d5063647d466684c6e1b196a90796", 993 | "placeholder": "​", 994 | "style": "IPY_MODEL_5a452ead65af499bbf868b5314dd7196", 995 | "value": " 1/1 [00:14<00:00, 14.94s/ file]" 996 | } 997 | }, 998 | "5a452ead65af499bbf868b5314dd7196": { 999 | "model_module": "@jupyter-widgets/controls", 1000 | "model_module_version": "1.5.0", 1001 | "model_name": "DescriptionStyleModel", 1002 | "state": { 1003 | "_model_module": "@jupyter-widgets/controls", 1004 | "_model_module_version": "1.5.0", 1005 | "_model_name": "DescriptionStyleModel", 1006 | "_view_count": null, 1007 | "_view_module": "@jupyter-widgets/base", 1008 | "_view_module_version": "1.2.0", 1009 | "_view_name": "StyleView", 1010 | "description_width": "" 1011 | } 1012 | }, 1013 | "5b27a0a8a6964be49befadfc9d209c93": { 1014 | "model_module": "@jupyter-widgets/base", 1015 | "model_module_version": "1.2.0", 1016 | "model_name": "LayoutModel", 1017 | "state": { 1018 | "_model_module": "@jupyter-widgets/base", 1019 | "_model_module_version": "1.2.0", 1020 | "_model_name": "LayoutModel", 1021 | "_view_count": null, 1022 | "_view_module": "@jupyter-widgets/base", 1023 | "_view_module_version": "1.2.0", 1024 | "_view_name": "LayoutView", 1025 | "align_content": null, 1026 | "align_items": null, 1027 | "align_self": null, 1028 | "border": null, 1029 | "bottom": null, 1030 | "display": null, 1031 | "flex": null, 1032 | "flex_flow": null, 1033 | "grid_area": null, 1034 | "grid_auto_columns": null, 1035 | "grid_auto_flow": null, 1036 | "grid_auto_rows": null, 1037 | "grid_column": null, 1038 | "grid_gap": null, 1039 | "grid_row": null, 1040 | "grid_template_areas": null, 1041 | "grid_template_columns": null, 1042 | "grid_template_rows": null, 1043 | "height": null, 1044 | "justify_content": null, 1045 | "justify_items": null, 1046 | "left": null, 1047 | "margin": null, 1048 | "max_height": null, 1049 | "max_width": null, 1050 | "min_height": null, 1051 | "min_width": null, 1052 | "object_fit": null, 1053 | "object_position": null, 1054 | "order": null, 1055 | "overflow": null, 1056 | "overflow_x": null, 1057 | "overflow_y": null, 1058 | "padding": null, 1059 | "right": null, 1060 | "top": null, 1061 | "visibility": null, 1062 | "width": null 1063 | } 1064 | }, 1065 | "6bf37bc7f5684a8eb3dcf625d02616fd": { 1066 | "model_module": "@jupyter-widgets/controls", 1067 | "model_module_version": "1.5.0", 1068 | "model_name": "FloatProgressModel", 1069 | "state": { 1070 | "_dom_classes": [], 1071 | "_model_module": "@jupyter-widgets/controls", 1072 | "_model_module_version": "1.5.0", 1073 | "_model_name": "FloatProgressModel", 1074 | "_view_count": null, 1075 | "_view_module": "@jupyter-widgets/controls", 1076 | "_view_module_version": "1.5.0", 1077 | "_view_name": "ProgressView", 1078 | "bar_style": "success", 1079 | "description": "", 1080 | "description_tooltip": null, 1081 | "layout": "IPY_MODEL_20d532a52ecd479fb7dfa9342decd2d1", 1082 | "max": 1, 1083 | "min": 0, 1084 | "orientation": "horizontal", 1085 | "style": "IPY_MODEL_a9e7a7b675844f1ab616bb1972c15d32", 1086 | "value": 1 1087 | } 1088 | }, 1089 | "6f08665adc814ac09e7cb21e0533866c": { 1090 | "model_module": "@jupyter-widgets/controls", 1091 | "model_module_version": "1.5.0", 1092 | "model_name": "DescriptionStyleModel", 1093 | "state": { 1094 | "_model_module": "@jupyter-widgets/controls", 1095 | "_model_module_version": "1.5.0", 1096 | "_model_name": "DescriptionStyleModel", 1097 | "_view_count": null, 1098 | "_view_module": "@jupyter-widgets/base", 1099 | "_view_module_version": "1.2.0", 1100 | "_view_name": "StyleView", 1101 | "description_width": "" 1102 | } 1103 | }, 1104 | "710373ad31fe4dc0b29c7c681e345cc8": { 1105 | "model_module": "@jupyter-widgets/base", 1106 | "model_module_version": "1.2.0", 1107 | "model_name": "LayoutModel", 1108 | "state": { 1109 | "_model_module": "@jupyter-widgets/base", 1110 | "_model_module_version": "1.2.0", 1111 | "_model_name": "LayoutModel", 1112 | "_view_count": null, 1113 | "_view_module": "@jupyter-widgets/base", 1114 | "_view_module_version": "1.2.0", 1115 | "_view_name": "LayoutView", 1116 | "align_content": null, 1117 | "align_items": null, 1118 | "align_self": null, 1119 | "border": null, 1120 | "bottom": null, 1121 | "display": null, 1122 | "flex": null, 1123 | "flex_flow": null, 1124 | "grid_area": null, 1125 | "grid_auto_columns": null, 1126 | "grid_auto_flow": null, 1127 | "grid_auto_rows": null, 1128 | "grid_column": null, 1129 | "grid_gap": null, 1130 | "grid_row": null, 1131 | "grid_template_areas": null, 1132 | "grid_template_columns": null, 1133 | "grid_template_rows": null, 1134 | "height": null, 1135 | "justify_content": null, 1136 | "justify_items": null, 1137 | "left": null, 1138 | "margin": null, 1139 | "max_height": null, 1140 | "max_width": null, 1141 | "min_height": null, 1142 | "min_width": null, 1143 | "object_fit": null, 1144 | "object_position": null, 1145 | "order": null, 1146 | "overflow": null, 1147 | "overflow_x": null, 1148 | "overflow_y": null, 1149 | "padding": null, 1150 | "right": null, 1151 | "top": null, 1152 | "visibility": null, 1153 | "width": null 1154 | } 1155 | }, 1156 | "9613736bf85c44338d6f5609e6efc90b": { 1157 | "model_module": "@jupyter-widgets/base", 1158 | "model_module_version": "1.2.0", 1159 | "model_name": "LayoutModel", 1160 | "state": { 1161 | "_model_module": "@jupyter-widgets/base", 1162 | "_model_module_version": "1.2.0", 1163 | "_model_name": "LayoutModel", 1164 | "_view_count": null, 1165 | "_view_module": "@jupyter-widgets/base", 1166 | "_view_module_version": "1.2.0", 1167 | "_view_name": "LayoutView", 1168 | "align_content": null, 1169 | "align_items": null, 1170 | "align_self": null, 1171 | "border": null, 1172 | "bottom": null, 1173 | "display": null, 1174 | "flex": null, 1175 | "flex_flow": null, 1176 | "grid_area": null, 1177 | "grid_auto_columns": null, 1178 | "grid_auto_flow": null, 1179 | "grid_auto_rows": null, 1180 | "grid_column": null, 1181 | "grid_gap": null, 1182 | "grid_row": null, 1183 | "grid_template_areas": null, 1184 | "grid_template_columns": null, 1185 | "grid_template_rows": null, 1186 | "height": null, 1187 | "justify_content": null, 1188 | "justify_items": null, 1189 | "left": null, 1190 | "margin": null, 1191 | "max_height": null, 1192 | "max_width": null, 1193 | "min_height": null, 1194 | "min_width": null, 1195 | "object_fit": null, 1196 | "object_position": null, 1197 | "order": null, 1198 | "overflow": null, 1199 | "overflow_x": null, 1200 | "overflow_y": null, 1201 | "padding": null, 1202 | "right": null, 1203 | "top": null, 1204 | "visibility": null, 1205 | "width": "20px" 1206 | } 1207 | }, 1208 | "9b8d5063647d466684c6e1b196a90796": { 1209 | "model_module": "@jupyter-widgets/base", 1210 | "model_module_version": "1.2.0", 1211 | "model_name": "LayoutModel", 1212 | "state": { 1213 | "_model_module": "@jupyter-widgets/base", 1214 | "_model_module_version": "1.2.0", 1215 | "_model_name": "LayoutModel", 1216 | "_view_count": null, 1217 | "_view_module": "@jupyter-widgets/base", 1218 | "_view_module_version": "1.2.0", 1219 | "_view_name": "LayoutView", 1220 | "align_content": null, 1221 | "align_items": null, 1222 | "align_self": null, 1223 | "border": null, 1224 | "bottom": null, 1225 | "display": null, 1226 | "flex": null, 1227 | "flex_flow": null, 1228 | "grid_area": null, 1229 | "grid_auto_columns": null, 1230 | "grid_auto_flow": null, 1231 | "grid_auto_rows": null, 1232 | "grid_column": null, 1233 | "grid_gap": null, 1234 | "grid_row": null, 1235 | "grid_template_areas": null, 1236 | "grid_template_columns": null, 1237 | "grid_template_rows": null, 1238 | "height": null, 1239 | "justify_content": null, 1240 | "justify_items": null, 1241 | "left": null, 1242 | "margin": null, 1243 | "max_height": null, 1244 | "max_width": null, 1245 | "min_height": null, 1246 | "min_width": null, 1247 | "object_fit": null, 1248 | "object_position": null, 1249 | "order": null, 1250 | "overflow": null, 1251 | "overflow_x": null, 1252 | "overflow_y": null, 1253 | "padding": null, 1254 | "right": null, 1255 | "top": null, 1256 | "visibility": null, 1257 | "width": null 1258 | } 1259 | }, 1260 | "a327863b15a54d1eb8cd2c26ee31c826": { 1261 | "model_module": "@jupyter-widgets/controls", 1262 | "model_module_version": "1.5.0", 1263 | "model_name": "HTMLModel", 1264 | "state": { 1265 | "_dom_classes": [], 1266 | "_model_module": "@jupyter-widgets/controls", 1267 | "_model_module_version": "1.5.0", 1268 | "_model_name": "HTMLModel", 1269 | "_view_count": null, 1270 | "_view_module": "@jupyter-widgets/controls", 1271 | "_view_module_version": "1.5.0", 1272 | "_view_name": "HTMLView", 1273 | "description": "", 1274 | "description_tooltip": null, 1275 | "layout": "IPY_MODEL_710373ad31fe4dc0b29c7c681e345cc8", 1276 | "placeholder": "​", 1277 | "style": "IPY_MODEL_1a9b33aebfb444a9b3d9442a5d72cea2", 1278 | "value": "Dl Completed...: 100%" 1279 | } 1280 | }, 1281 | "a7265046f01343a0a9de7635e60e2d5f": { 1282 | "model_module": "@jupyter-widgets/controls", 1283 | "model_module_version": "1.5.0", 1284 | "model_name": "HTMLModel", 1285 | "state": { 1286 | "_dom_classes": [], 1287 | "_model_module": "@jupyter-widgets/controls", 1288 | "_model_module_version": "1.5.0", 1289 | "_model_name": "HTMLModel", 1290 | "_view_count": null, 1291 | "_view_module": "@jupyter-widgets/controls", 1292 | "_view_module_version": "1.5.0", 1293 | "_view_name": "HTMLView", 1294 | "description": "", 1295 | "description_tooltip": null, 1296 | "layout": "IPY_MODEL_b515ef8f06744ce486f110326909e90e", 1297 | "placeholder": "​", 1298 | "style": "IPY_MODEL_0b02c308f2604dd28dd5b430d6239253", 1299 | "value": " 162/162 [00:14<00:00, 15.22 MiB/s]" 1300 | } 1301 | }, 1302 | "a9e7a7b675844f1ab616bb1972c15d32": { 1303 | "model_module": "@jupyter-widgets/controls", 1304 | "model_module_version": "1.5.0", 1305 | "model_name": "ProgressStyleModel", 1306 | "state": { 1307 | "_model_module": "@jupyter-widgets/controls", 1308 | "_model_module_version": "1.5.0", 1309 | "_model_name": "ProgressStyleModel", 1310 | "_view_count": null, 1311 | "_view_module": "@jupyter-widgets/base", 1312 | "_view_module_version": "1.2.0", 1313 | "_view_name": "StyleView", 1314 | "bar_color": null, 1315 | "description_width": "" 1316 | } 1317 | }, 1318 | "af4562715ddd41e786e717033a6f3fec": { 1319 | "model_module": "@jupyter-widgets/controls", 1320 | "model_module_version": "1.5.0", 1321 | "model_name": "HBoxModel", 1322 | "state": { 1323 | "_dom_classes": [], 1324 | "_model_module": "@jupyter-widgets/controls", 1325 | "_model_module_version": "1.5.0", 1326 | "_model_name": "HBoxModel", 1327 | "_view_count": null, 1328 | "_view_module": "@jupyter-widgets/controls", 1329 | "_view_module_version": "1.5.0", 1330 | "_view_name": "HBoxView", 1331 | "box_style": "", 1332 | "children": [ 1333 | "IPY_MODEL_d0c79d32309640a18b1097e50c273489", 1334 | "IPY_MODEL_2f93c4391a204a26a8179b0a6b46a23e", 1335 | "IPY_MODEL_4fdde132c9744c74bbf7aa7a3185ae64" 1336 | ], 1337 | "layout": "IPY_MODEL_5b27a0a8a6964be49befadfc9d209c93" 1338 | } 1339 | }, 1340 | "b515ef8f06744ce486f110326909e90e": { 1341 | "model_module": "@jupyter-widgets/base", 1342 | "model_module_version": "1.2.0", 1343 | "model_name": "LayoutModel", 1344 | "state": { 1345 | "_model_module": "@jupyter-widgets/base", 1346 | "_model_module_version": "1.2.0", 1347 | "_model_name": "LayoutModel", 1348 | "_view_count": null, 1349 | "_view_module": "@jupyter-widgets/base", 1350 | "_view_module_version": "1.2.0", 1351 | "_view_name": "LayoutView", 1352 | "align_content": null, 1353 | "align_items": null, 1354 | "align_self": null, 1355 | "border": null, 1356 | "bottom": null, 1357 | "display": null, 1358 | "flex": null, 1359 | "flex_flow": null, 1360 | "grid_area": null, 1361 | "grid_auto_columns": null, 1362 | "grid_auto_flow": null, 1363 | "grid_auto_rows": null, 1364 | "grid_column": null, 1365 | "grid_gap": null, 1366 | "grid_row": null, 1367 | "grid_template_areas": null, 1368 | "grid_template_columns": null, 1369 | "grid_template_rows": null, 1370 | "height": null, 1371 | "justify_content": null, 1372 | "justify_items": null, 1373 | "left": null, 1374 | "margin": null, 1375 | "max_height": null, 1376 | "max_width": null, 1377 | "min_height": null, 1378 | "min_width": null, 1379 | "object_fit": null, 1380 | "object_position": null, 1381 | "order": null, 1382 | "overflow": null, 1383 | "overflow_x": null, 1384 | "overflow_y": null, 1385 | "padding": null, 1386 | "right": null, 1387 | "top": null, 1388 | "visibility": null, 1389 | "width": null 1390 | } 1391 | }, 1392 | "b7e8c1bf6e394e61aae415a735b18ffe": { 1393 | "model_module": "@jupyter-widgets/controls", 1394 | "model_module_version": "1.5.0", 1395 | "model_name": "DescriptionStyleModel", 1396 | "state": { 1397 | "_model_module": "@jupyter-widgets/controls", 1398 | "_model_module_version": "1.5.0", 1399 | "_model_name": "DescriptionStyleModel", 1400 | "_view_count": null, 1401 | "_view_module": "@jupyter-widgets/base", 1402 | "_view_module_version": "1.2.0", 1403 | "_view_name": "StyleView", 1404 | "description_width": "" 1405 | } 1406 | }, 1407 | "c8c9446c1a2f4a3785600ab596a451c9": { 1408 | "model_module": "@jupyter-widgets/base", 1409 | "model_module_version": "1.2.0", 1410 | "model_name": "LayoutModel", 1411 | "state": { 1412 | "_model_module": "@jupyter-widgets/base", 1413 | "_model_module_version": "1.2.0", 1414 | "_model_name": "LayoutModel", 1415 | "_view_count": null, 1416 | "_view_module": "@jupyter-widgets/base", 1417 | "_view_module_version": "1.2.0", 1418 | "_view_name": "LayoutView", 1419 | "align_content": null, 1420 | "align_items": null, 1421 | "align_self": null, 1422 | "border": null, 1423 | "bottom": null, 1424 | "display": null, 1425 | "flex": null, 1426 | "flex_flow": null, 1427 | "grid_area": null, 1428 | "grid_auto_columns": null, 1429 | "grid_auto_flow": null, 1430 | "grid_auto_rows": null, 1431 | "grid_column": null, 1432 | "grid_gap": null, 1433 | "grid_row": null, 1434 | "grid_template_areas": null, 1435 | "grid_template_columns": null, 1436 | "grid_template_rows": null, 1437 | "height": null, 1438 | "justify_content": null, 1439 | "justify_items": null, 1440 | "left": null, 1441 | "margin": null, 1442 | "max_height": null, 1443 | "max_width": null, 1444 | "min_height": null, 1445 | "min_width": null, 1446 | "object_fit": null, 1447 | "object_position": null, 1448 | "order": null, 1449 | "overflow": null, 1450 | "overflow_x": null, 1451 | "overflow_y": null, 1452 | "padding": null, 1453 | "right": null, 1454 | "top": null, 1455 | "visibility": null, 1456 | "width": null 1457 | } 1458 | }, 1459 | "d0c79d32309640a18b1097e50c273489": { 1460 | "model_module": "@jupyter-widgets/controls", 1461 | "model_module_version": "1.5.0", 1462 | "model_name": "HTMLModel", 1463 | "state": { 1464 | "_dom_classes": [], 1465 | "_model_module": "@jupyter-widgets/controls", 1466 | "_model_module_version": "1.5.0", 1467 | "_model_name": "HTMLModel", 1468 | "_view_count": null, 1469 | "_view_module": "@jupyter-widgets/controls", 1470 | "_view_module_version": "1.5.0", 1471 | "_view_name": "HTMLView", 1472 | "description": "", 1473 | "description_tooltip": null, 1474 | "layout": "IPY_MODEL_fd5460933db44c92b60e41b065551215", 1475 | "placeholder": "​", 1476 | "style": "IPY_MODEL_6f08665adc814ac09e7cb21e0533866c", 1477 | "value": "Extraction completed...: 100%" 1478 | } 1479 | }, 1480 | "d46fa732fb474fce8f940c031ca06637": { 1481 | "model_module": "@jupyter-widgets/controls", 1482 | "model_module_version": "1.5.0", 1483 | "model_name": "FloatProgressModel", 1484 | "state": { 1485 | "_dom_classes": [], 1486 | "_model_module": "@jupyter-widgets/controls", 1487 | "_model_module_version": "1.5.0", 1488 | "_model_name": "FloatProgressModel", 1489 | "_view_count": null, 1490 | "_view_module": "@jupyter-widgets/controls", 1491 | "_view_module_version": "1.5.0", 1492 | "_view_name": "ProgressView", 1493 | "bar_style": "success", 1494 | "description": "", 1495 | "description_tooltip": null, 1496 | "layout": "IPY_MODEL_9613736bf85c44338d6f5609e6efc90b", 1497 | "max": 1, 1498 | "min": 0, 1499 | "orientation": "horizontal", 1500 | "style": "IPY_MODEL_dbe66bad8ba94b30b93c7c9d25cf8fbd", 1501 | "value": 1 1502 | } 1503 | }, 1504 | "dbe66bad8ba94b30b93c7c9d25cf8fbd": { 1505 | "model_module": "@jupyter-widgets/controls", 1506 | "model_module_version": "1.5.0", 1507 | "model_name": "ProgressStyleModel", 1508 | "state": { 1509 | "_model_module": "@jupyter-widgets/controls", 1510 | "_model_module_version": "1.5.0", 1511 | "_model_name": "ProgressStyleModel", 1512 | "_view_count": null, 1513 | "_view_module": "@jupyter-widgets/base", 1514 | "_view_module_version": "1.2.0", 1515 | "_view_name": "StyleView", 1516 | "bar_color": null, 1517 | "description_width": "" 1518 | } 1519 | }, 1520 | "e3c232f427c649f2bf3f58ed97ad15d1": { 1521 | "model_module": "@jupyter-widgets/controls", 1522 | "model_module_version": "1.5.0", 1523 | "model_name": "ProgressStyleModel", 1524 | "state": { 1525 | "_model_module": "@jupyter-widgets/controls", 1526 | "_model_module_version": "1.5.0", 1527 | "_model_name": "ProgressStyleModel", 1528 | "_view_count": null, 1529 | "_view_module": "@jupyter-widgets/base", 1530 | "_view_module_version": "1.2.0", 1531 | "_view_name": "StyleView", 1532 | "bar_color": null, 1533 | "description_width": "" 1534 | } 1535 | }, 1536 | "ecefe53c759e4a54aa1f3ed44444fdf5": { 1537 | "model_module": "@jupyter-widgets/base", 1538 | "model_module_version": "1.2.0", 1539 | "model_name": "LayoutModel", 1540 | "state": { 1541 | "_model_module": "@jupyter-widgets/base", 1542 | "_model_module_version": "1.2.0", 1543 | "_model_name": "LayoutModel", 1544 | "_view_count": null, 1545 | "_view_module": "@jupyter-widgets/base", 1546 | "_view_module_version": "1.2.0", 1547 | "_view_name": "LayoutView", 1548 | "align_content": null, 1549 | "align_items": null, 1550 | "align_self": null, 1551 | "border": null, 1552 | "bottom": null, 1553 | "display": null, 1554 | "flex": null, 1555 | "flex_flow": null, 1556 | "grid_area": null, 1557 | "grid_auto_columns": null, 1558 | "grid_auto_flow": null, 1559 | "grid_auto_rows": null, 1560 | "grid_column": null, 1561 | "grid_gap": null, 1562 | "grid_row": null, 1563 | "grid_template_areas": null, 1564 | "grid_template_columns": null, 1565 | "grid_template_rows": null, 1566 | "height": null, 1567 | "justify_content": null, 1568 | "justify_items": null, 1569 | "left": null, 1570 | "margin": null, 1571 | "max_height": null, 1572 | "max_width": null, 1573 | "min_height": null, 1574 | "min_width": null, 1575 | "object_fit": null, 1576 | "object_position": null, 1577 | "order": null, 1578 | "overflow": null, 1579 | "overflow_x": null, 1580 | "overflow_y": null, 1581 | "padding": null, 1582 | "right": null, 1583 | "top": null, 1584 | "visibility": null, 1585 | "width": null 1586 | } 1587 | }, 1588 | "fb0e11738e734dc6a7428c4a82e81705": { 1589 | "model_module": "@jupyter-widgets/controls", 1590 | "model_module_version": "1.5.0", 1591 | "model_name": "HBoxModel", 1592 | "state": { 1593 | "_dom_classes": [], 1594 | "_model_module": "@jupyter-widgets/controls", 1595 | "_model_module_version": "1.5.0", 1596 | "_model_name": "HBoxModel", 1597 | "_view_count": null, 1598 | "_view_module": "@jupyter-widgets/controls", 1599 | "_view_module_version": "1.5.0", 1600 | "_view_name": "HBoxView", 1601 | "box_style": "", 1602 | "children": [ 1603 | "IPY_MODEL_a327863b15a54d1eb8cd2c26ee31c826", 1604 | "IPY_MODEL_d46fa732fb474fce8f940c031ca06637", 1605 | "IPY_MODEL_fcf94990ac4f46b2a0fd54e06eb0f88b" 1606 | ], 1607 | "layout": "IPY_MODEL_4f7c45939d3f49eb856ccc58f999e37b" 1608 | } 1609 | }, 1610 | "fcf94990ac4f46b2a0fd54e06eb0f88b": { 1611 | "model_module": "@jupyter-widgets/controls", 1612 | "model_module_version": "1.5.0", 1613 | "model_name": "HTMLModel", 1614 | "state": { 1615 | "_dom_classes": [], 1616 | "_model_module": "@jupyter-widgets/controls", 1617 | "_model_module_version": "1.5.0", 1618 | "_model_name": "HTMLModel", 1619 | "_view_count": null, 1620 | "_view_module": "@jupyter-widgets/controls", 1621 | "_view_module_version": "1.5.0", 1622 | "_view_name": "HTMLView", 1623 | "description": "", 1624 | "description_tooltip": null, 1625 | "layout": "IPY_MODEL_ecefe53c759e4a54aa1f3ed44444fdf5", 1626 | "placeholder": "​", 1627 | "style": "IPY_MODEL_385a10a4ca644ca38db0f539be5a54d5", 1628 | "value": " 1/1 [00:15<00:00, 12.66s/ url]" 1629 | } 1630 | }, 1631 | "fd5460933db44c92b60e41b065551215": { 1632 | "model_module": "@jupyter-widgets/base", 1633 | "model_module_version": "1.2.0", 1634 | "model_name": "LayoutModel", 1635 | "state": { 1636 | "_model_module": "@jupyter-widgets/base", 1637 | "_model_module_version": "1.2.0", 1638 | "_model_name": "LayoutModel", 1639 | "_view_count": null, 1640 | "_view_module": "@jupyter-widgets/base", 1641 | "_view_module_version": "1.2.0", 1642 | "_view_name": "LayoutView", 1643 | "align_content": null, 1644 | "align_items": null, 1645 | "align_self": null, 1646 | "border": null, 1647 | "bottom": null, 1648 | "display": null, 1649 | "flex": null, 1650 | "flex_flow": null, 1651 | "grid_area": null, 1652 | "grid_auto_columns": null, 1653 | "grid_auto_flow": null, 1654 | "grid_auto_rows": null, 1655 | "grid_column": null, 1656 | "grid_gap": null, 1657 | "grid_row": null, 1658 | "grid_template_areas": null, 1659 | "grid_template_columns": null, 1660 | "grid_template_rows": null, 1661 | "height": null, 1662 | "justify_content": null, 1663 | "justify_items": null, 1664 | "left": null, 1665 | "margin": null, 1666 | "max_height": null, 1667 | "max_width": null, 1668 | "min_height": null, 1669 | "min_width": null, 1670 | "object_fit": null, 1671 | "object_position": null, 1672 | "order": null, 1673 | "overflow": null, 1674 | "overflow_x": null, 1675 | "overflow_y": null, 1676 | "padding": null, 1677 | "right": null, 1678 | "top": null, 1679 | "visibility": null, 1680 | "width": null 1681 | } 1682 | }, 1683 | "fd5c76c7cda94b22814409cbda62a38d": { 1684 | "model_module": "@jupyter-widgets/base", 1685 | "model_module_version": "1.2.0", 1686 | "model_name": "LayoutModel", 1687 | "state": { 1688 | "_model_module": "@jupyter-widgets/base", 1689 | "_model_module_version": "1.2.0", 1690 | "_model_name": "LayoutModel", 1691 | "_view_count": null, 1692 | "_view_module": "@jupyter-widgets/base", 1693 | "_view_module_version": "1.2.0", 1694 | "_view_name": "LayoutView", 1695 | "align_content": null, 1696 | "align_items": null, 1697 | "align_self": null, 1698 | "border": null, 1699 | "bottom": null, 1700 | "display": null, 1701 | "flex": null, 1702 | "flex_flow": null, 1703 | "grid_area": null, 1704 | "grid_auto_columns": null, 1705 | "grid_auto_flow": null, 1706 | "grid_auto_rows": null, 1707 | "grid_column": null, 1708 | "grid_gap": null, 1709 | "grid_row": null, 1710 | "grid_template_areas": null, 1711 | "grid_template_columns": null, 1712 | "grid_template_rows": null, 1713 | "height": null, 1714 | "justify_content": null, 1715 | "justify_items": null, 1716 | "left": null, 1717 | "margin": null, 1718 | "max_height": null, 1719 | "max_width": null, 1720 | "min_height": null, 1721 | "min_width": null, 1722 | "object_fit": null, 1723 | "object_position": null, 1724 | "order": null, 1725 | "overflow": null, 1726 | "overflow_x": null, 1727 | "overflow_y": null, 1728 | "padding": null, 1729 | "right": null, 1730 | "top": null, 1731 | "visibility": null, 1732 | "width": null 1733 | } 1734 | } 1735 | } 1736 | } 1737 | }, 1738 | "nbformat": 4, 1739 | "nbformat_minor": 1 1740 | } 1741 | --------------------------------------------------------------------------------