├── figures
├── overview.jpeg
├── How datasets are managed.png
└── batch evaluation pipeline.png
├── notebooks
├── test-images
│ ├── bird_0000.jpg
│ ├── cat_0000.jpg
│ ├── deer_0000.jpg
│ ├── dog_0000.jpg
│ ├── frog_0000.jpg
│ ├── ship_0000.jpg
│ ├── horse_0000.jpg
│ ├── truck_0000.jpg
│ ├── airplane_0000.jpg
│ └── automobile_0000.jpg
├── README.md
├── 98_Batch_Prediction_Test.ipynb
├── 04_Cloud_Scheduler_Trigger.ipynb
├── 02_TFX_Training_Pipeline.ipynb
└── 01_Dataset_Prep.ipynb
├── .github
└── workflows
│ └── lint.yml
├── custom_components
├── file_list_gen.py
├── training_pipeline_trigger.py
├── batch_pred_evaluator.py
├── batch_prediction_vertex.py
└── span_preparator.py
├── README.md
└── LICENSE
/figures/overview.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/overview.jpeg
--------------------------------------------------------------------------------
/notebooks/test-images/bird_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/bird_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/cat_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/cat_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/deer_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/deer_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/dog_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/dog_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/frog_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/frog_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/ship_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/ship_0000.jpg
--------------------------------------------------------------------------------
/figures/How datasets are managed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/How datasets are managed.png
--------------------------------------------------------------------------------
/figures/batch evaluation pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/figures/batch evaluation pipeline.png
--------------------------------------------------------------------------------
/notebooks/test-images/horse_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/horse_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/truck_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/truck_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/airplane_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/airplane_0000.jpg
--------------------------------------------------------------------------------
/notebooks/test-images/automobile_0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/HEAD/notebooks/test-images/automobile_0000.jpg
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: black-action
2 | on: [push, pull_request]
3 | jobs:
4 | linter_name:
5 | name: runner / black
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | - name: Check files using the black formatter
10 | uses: rickstaa/action-black@v1
11 | id: action_black
12 | with:
13 | black_args: ". --check --diff"
14 | - name: Annotate diff changes using reviewdog
15 | if: steps.action_black.outputs.is_formatted == 'true'
16 | uses: reviewdog/action-suggester@v1
17 | with:
18 | tool_name: blackfmt
19 |
--------------------------------------------------------------------------------
/custom_components/file_list_gen.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate a txt file formatted required by Vertex AI's Batch Prediction
3 | There are few options, and this component generate "file list" formatted txt.
4 | (https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions)
5 | """
6 |
7 | import tensorflow as tf
8 | from absl import logging
9 |
10 | from tfx.dsl.component.experimental.decorators import component
11 | from tfx.dsl.component.experimental.annotations import Parameter, OutputArtifact
12 | from tfx.types.standard_artifacts import String
13 |
14 |
15 | @component
16 | def FileListGen(
17 | outpath: OutputArtifact[String],
18 | gcs_src_bucket: Parameter[str],
19 | gcs_src_prefix: Parameter[str] = "",
20 | output_filename: Parameter[str] = "test-images.txt",
21 | ):
22 | """
23 | : param outpath: OutputArtifact to hold where output_filename will be located
24 | This will be used in the downstream component, BatchPredictionGen
25 | : param gcs_src_bucket: GCS bucket name where the list of raw data is
26 | : param gcs_src_prefix: prefix to be added to gcs_src_bucket
27 | : param output_filename: output filename whose content is a list of file paths of raw data
28 | """
29 | logging.info("FileListGen started")
30 |
31 | # 1. get the list of data
32 | gcs_src_prefix = (
33 | f"{gcs_src_prefix}/" if len(gcs_src_prefix) != 0 else gcs_src_prefix
34 | )
35 | img_paths = tf.io.gfile.glob(f"gs://{gcs_src_bucket}/{gcs_src_prefix}*.jpg")
36 | logging.info("Successfully retrieve the file(jpg) list from GCS path")
37 |
38 | # 2. write the list of data in the expected format in Vertex AI Batch Prediction to a local file
39 | with open(output_filename, "w", encoding="utf-8") as f:
40 | f.writelines("%s\n" % img_path for img_path in img_paths)
41 | logging.info(
42 | f"Successfully created the file list file({output_filename}) in local storage"
43 | )
44 |
45 | # 3. upload the local file to GCS location
46 | gcs_dst = f"{gcs_src_bucket}/{gcs_src_prefix}{output_filename}"
47 | tf.io.gfile.copy(output_filename, f"gs://{gcs_dst}", overwrite=True)
48 | logging.info(f"Successfully uploaded the file list ({gcs_dst})")
49 |
50 | # 4. store the GCS location where the local file is
51 | outpath.value = gcs_dst
52 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | Notebook | Description | Colab Link
2 | --- | --- | --- |
3 | 01_Dataset_Prep.ipynb | Download CIFAR10 TFRecord from TFDS (TensorFlow Dataset) and upload it to GCS Bucket. It makes sure the directory structure follows `/span-{SPAN}/[train\|val]/*.tfrecord` format. It is primarily need for the initial model training as shown in the `02_TFX_Training_Pipeline.ipynb` notebook. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/01_Dataset_Prep.ipynb)
4 | 02_TFX_Training_Pipeline.ipynb | Build TFX pipeline that can be run on Vertex AI Pipeline. `ExampleGen`, `Trainer`, and `Pusher` components are included. The trained model is also deployed on Vertex AI and can be consumed by developers via API calls. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/02_TFX_Training_Pipeline.ipynb)
5 | 03_Batch_Prediction_Pipeline.ipynb | Build KFP pipeline that runs batch prediction on the trained model obtained from `02_TFX_Training_Pipeline.ipynb`. Then the `TFX Training Pipeline` gets triggered based on the evaluation of the predicted results. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/03_Batch_Prediction_Pipeline.ipynb)
6 | 04_Cloud_Scheduler_Trigger.ipynb | Create/Publish Pub/Sub topic, and deploy Cloud Function listening to the Pub/Sub topic to trigger the batch prediction pipeline. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb)
7 | 97_Prepare_Test_Images.ipynb | Download test images from [Bing](https://www.bing.com/) to simulate data drift. Downloaded images will be moved into a GCS bucket. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/97_Prepare_Test_Images.ipynb)
8 | 98_Batch_Prediction_Test.ipynb | Makes a batch prediction on a deployed model via Vertex AI Prediction. It measures the model performance (accuracy) on new data. | [](http://colab.research.google.com/github/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/98_Batch_Prediction_Test.ipynb)
9 |
--------------------------------------------------------------------------------
/custom_components/training_pipeline_trigger.py:
--------------------------------------------------------------------------------
1 | """
2 | Component responsible for triggering a training job given a pipeline specification.
3 | """
4 |
5 | import json
6 |
7 | from google.cloud import storage
8 |
9 | from kfp.v2.google.client import AIPlatformClient
10 | from tfx.dsl.component.experimental.annotations import Parameter, InputArtifact
11 | from tfx.dsl.component.experimental.decorators import component
12 | from tfx.types.experimental.simple_artifacts import Dataset
13 |
14 | from absl import logging
15 |
16 |
17 | @component
18 | def PipelineTrigger(
19 | is_retrain: InputArtifact[Dataset],
20 | latest_span_id: InputArtifact[Dataset],
21 | pipeline_spec_path: Parameter[str],
22 | project_id: Parameter[str],
23 | region: Parameter[str],
24 | ):
25 | """
26 | :param is_retrain: Boolean to indicate if we are retraining.
27 | :param latest_span_id: Latest span id to craft training data for the model.
28 | :param pipeline_spec_path: Training pipeline specification path.
29 | :param project_id: GCP project id.
30 | :param region: GCP region.
31 | """
32 | if is_retrain.get_string_custom_property("result") == "False":
33 | # Check if the pipeline spec exists.
34 | storage_client = storage.Client()
35 |
36 | path_parts = pipeline_spec_path.replace("gs://", "").split("/")
37 | bucket_name = path_parts[0]
38 | blob_name = "/".join(path_parts[1:])
39 |
40 | bucket = storage_client.bucket(bucket_name)
41 | blob = storage.Blob(bucket=bucket, name=blob_name)
42 |
43 | if not blob.exists(storage_client):
44 | raise ValueError(f"{pipeline_spec_path} does not exist.")
45 |
46 | # Initialize Vertex AI API client and submit for pipeline execution.
47 | api_client = AIPlatformClient(project_id=project_id, region=region)
48 |
49 | # Fetch the latest span.
50 | latest_span = latest_span_id.get_string_custom_property("latest_span")
51 |
52 | # Create a training job from pipeline spec.
53 | response = api_client.create_run_from_job_spec(
54 | pipeline_spec_path,
55 | enable_caching=False,
56 | parameter_values={
57 | "input-config": json.dumps(
58 | {
59 | "splits": [
60 | {
61 | "name": "train",
62 | "pattern": f"span-[{int(latest_span)-1}{latest_span}]/train/*.tfrecord",
63 | },
64 | {
65 | "name": "val",
66 | "pattern": f"span-[{int(latest_span)-1}{latest_span}]/test/*.tfrecord",
67 | },
68 | ]
69 | }
70 | ),
71 | "output-config": json.dumps({}),
72 | },
73 | )
74 | logging.info(response)
75 |
--------------------------------------------------------------------------------
/custom_components/batch_pred_evaluator.py:
--------------------------------------------------------------------------------
1 | """
2 | This component evaluates the performance of a currently deployed model, and
3 | the evaluation is based on the result of batch prediction on Vertex AI from the previous component.
4 | At the end, this component will output true or false to indicate if retraining is needed.
5 | Reference: https://bit.ly/vertex-batch
6 | """
7 |
8 | from tfx.dsl.component.experimental.annotations import Parameter, OutputArtifact
9 | from tfx.dsl.component.experimental.decorators import component
10 | from tfx.types.experimental.simple_artifacts import Dataset
11 |
12 | from absl import logging
13 | import os
14 | import json
15 |
16 |
17 | @component
18 | def PerformanceEvaluator(
19 | gcs_destination: Parameter[str],
20 | local_directory: Parameter[str],
21 | threshold: Parameter[float],
22 | trigger_pipeline: OutputArtifact[Dataset],
23 | ):
24 | """
25 | gcs_destination: GCS location where the files containing
26 | the result of batch prediction is
27 | local_directory: Temporary directory to hold files copied
28 | from the gcs_destination
29 | threshold: threshold to decide if retraining is needed or not
30 | it is based on the measured accuracy
31 | trigger_pipeline: an output artifact which hold true or false
32 | to indicate if retraining is needed or not
33 | """
34 |
35 | full_gcs_results_dir = f"{gcs_destination}/{local_directory}"
36 |
37 | # Create missing directories.
38 | os.makedirs(local_directory, exist_ok=True)
39 |
40 | # Get the Cloud Storage paths for each result.
41 | os.system(f"gsutil -m cp -r {full_gcs_results_dir} {local_directory}")
42 |
43 | # Get most recently modified directory.
44 | latest_directory = max(
45 | [os.path.join(local_directory, d) for d in os.listdir(local_directory)],
46 | key=os.path.getmtime,
47 | )
48 |
49 | # Get downloaded results in directory.
50 | results_files = []
51 | for dirpath, subdirs, files in os.walk(latest_directory):
52 | for file in files:
53 | if file.startswith("prediction.results"):
54 | results_files.append(os.path.join(dirpath, file))
55 |
56 | # Consolidate all the results into a list.
57 | results = []
58 | for results_file in results_files:
59 | # Download each result.
60 | with open(results_file, "r") as file:
61 | results.extend([json.loads(line) for line in file.readlines()])
62 |
63 | # Calculate performance.
64 | num_correct = 0
65 |
66 | for result in results:
67 | label = os.path.basename(result["instance"]).split("_")[0]
68 | prediction = result["prediction"]["label"]
69 |
70 | if label == prediction:
71 | num_correct = num_correct + 1
72 |
73 | accuracy = num_correct / len(results)
74 | logging.info(f"Accuracy: {accuracy*100}%")
75 |
76 | # Store the boolean result.
77 | trigger_pipeline.set_string_custom_property("result", str(accuracy >= threshold))
78 |
--------------------------------------------------------------------------------
/custom_components/batch_prediction_vertex.py:
--------------------------------------------------------------------------------
1 | """
2 | This component launches a Batch Prediction job on Vertex AI.
3 | Know more about Vertex AI Batch Predictions jobs, go here:
4 | https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions.
5 | """
6 |
7 | from google.cloud import storage
8 |
9 | from tfx.dsl.component.experimental.annotations import Parameter, InputArtifact
10 | from tfx.dsl.component.experimental.decorators import component
11 | from tfx.types.standard_artifacts import String
12 | import google.cloud.aiplatform as vertex_ai
13 |
14 | from absl import logging
15 |
16 |
17 | @component
18 | def BatchPredictionGen(
19 | gcs_source: InputArtifact[String],
20 | project: Parameter[str],
21 | location: Parameter[str],
22 | model_resource_name: Parameter[str],
23 | job_display_name: Parameter[str],
24 | gcs_destination: Parameter[str],
25 | instances_format: Parameter[str] = "file-list",
26 | machine_type: Parameter[str] = "n1-standard-2",
27 | accelerator_count: Parameter[int] = 0,
28 | accelerator_type: Parameter[str] = None,
29 | starting_replica_count: Parameter[int] = 1,
30 | max_replica_count: Parameter[int] = 1,
31 | ):
32 | """
33 | gcs_source: A location inside GCS to be used by the Batch Prediction job to get its inputs.
34 | Rest of the parameters are explained here: https://git.io/JiUyU.
35 | """
36 | storage_client = storage.Client()
37 |
38 | # Read GCS Source (gcs_source contains the full path of GCS object).
39 | # 1-1. get bucketname from gcs_source
40 | gcs_source_uri = gcs_source.uri.split("//")[1:][0].split("/")
41 | bucketname = gcs_source_uri[0]
42 | bucket = storage_client.get_bucket(bucketname)
43 | logging.info(f"bucketname: {bucketname}")
44 |
45 | # 1-2. get object path without the bucket name.
46 | objectpath = "/".join(gcs_source_uri[1:])
47 |
48 | # 1-3. read the object to get value set by OutputArtifact from FileListGen.
49 | blob = bucket.blob(objectpath)
50 | logging.info(f"objectpath: {objectpath}")
51 |
52 | gcs_source = f"gs://{blob.download_as_text()}"
53 |
54 | # Get Model.
55 | vertex_ai.init(project=project, location=location)
56 | model = vertex_ai.Model.list(
57 | filter=f"display_name={model_resource_name}", order_by="update_time"
58 | )[-1]
59 |
60 | # Launch a Batch Prediction job.
61 | logging.info("Starting batch prediction job.")
62 | logging.info(f"GCS path where file list is: {gcs_source}")
63 | batch_prediction_job = model.batch_predict(
64 | job_display_name=job_display_name,
65 | instances_format=instances_format,
66 | gcs_source=gcs_source,
67 | gcs_destination_prefix=gcs_destination,
68 | machine_type=machine_type,
69 | accelerator_count=accelerator_count,
70 | accelerator_type=accelerator_type,
71 | starting_replica_count=starting_replica_count,
72 | max_replica_count=max_replica_count,
73 | sync=True,
74 | )
75 |
76 | logging.info(batch_prediction_job.display_name)
77 | logging.info(batch_prediction_job.resource_name)
78 | logging.info(batch_prediction_job.state)
79 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Continuous Adaptation for Machine Learning System to Data Changes ([#TFCommunitySpotlight Awarded](https://twitter.com/TensorFlow/status/1469019016782041095?s=20))
2 |
3 | _By [Chansung Park](https://github.com/deep-diver) and [Sayak Paul](https://github.com/sayakpaul)_
4 |
5 | 
6 |
7 | MLOps system evolves according to the changes of the world, and that is usually caused by [data/concept drift](https://en.wikipedia.org/wiki/Concept_drift). This project shows how to combine two separate pipelines, one for batch prediction and the other for training to adapt to data changes. We worked with the TFX team to author a blog post detailing our approach. The blog post is available here: https://blog.tensorflow.org/2021/12/continuous-adaptation-for-machine.html.
8 |
9 | We assume familiarity with basic MLOps concepts (like pipelines, data drift, batch predictions, etc.), TensorFlow, TensorFlow Extedned, and Vertex AI from the reader.
10 |
11 | MLOps system also can be evolved when much better algorithm (i.e. state-of-the-art model) comes out. In that case, the system should apply a better algorithm to understand the existing data better. We have demonstrated such workflows in the following projects:
12 |
13 | * Model Training as a CI/CD System Part1: Reflect changes in codebase to MLOps pipeline: [Code on GitHub](https://github.com/deep-diver/Model-Training-as-a-CI-CD-System), [Article on the GCP blog](https://cloud.google.com/blog/topics/developers-practitioners/model-training-cicd-system-part-i)
14 | * Model Training as a CI/CD System Part2: Trigger, schedule, and run MLOps pipelines: [Code on GitHub](https://github.com/sayakpaul/CI-CD-for-Model-Training), [Article on the GCP blog](https://cloud.google.com/blog/topics/developers-practitioners/model-training-cicd-system-part-ii)
15 |
16 | ## Workflow
17 |
18 | 1. Run the initial training pipeline to train an image classifier and deploy it using TensorFlow, TFX, and Vertex AI ([`02_TFX_Training_Pipeline.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/02_TFX_Training_Pipeline.ipynb)).
19 | 2. Download and prepare images from Bing search to simulate the data drift ([`97_Prepare_Test_Images.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/97_Prepare_Test_Images.ipynb)).
20 | 3. Generate batch prediction pipeline specification (JSON) ([`03_Batch_Prediction_Pipeline.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/03_Batch_Prediction_Pipeline.ipynb)).
21 | 4. Deploy cloud function to watch if there are enough sample data to perform batch prediction pipeline and to trigger the batch prediction pipeline ([`04_Cloud_Scheduler_Trigger.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb)).
22 | 5. Schedule a periodic job to run the deployed cloud function ([`04_Cloud_Scheduler_Trigger.ipynb`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/blob/main/notebooks/04_Cloud_Scheduler_Trigger.ipynb)).
23 |
24 | ## Custom components
25 |
26 | We developed several custom components in TFX for this project. You can find them under the [`custom_components`](https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/tree/main/custom_components) directory.
27 |
28 | ## Checklist
29 |
30 | - [X] Initial Data Preparation (CIFAR10)
31 | - [X] Build Training Pipeline
32 | - [X] Build Batch Prediction Pipeline
33 | - [X] FileListGen component
34 | - [X] BatchPredictionGen component
35 | - [X] PerformanceEvaluator component
36 | - [X] SpanPreparator component
37 | - [X] PipelineTrigger component
38 | - [X] Data Preparation for Data/Concept Drift Simulation (from Bing)
39 | - [X] Deploy Cloud Function, Schedule a Job to Trigger the Cloud Function
40 | - [X] End to End Test
41 |
42 | ## Feedback
43 |
44 | We welcome feedback. Please create an issue to let us know what you think.
45 |
46 | ## Acknowledgements
47 |
48 | * [ML-GDE program](https://developers.google.com/programs/experts/) for providing GCP credits.
49 | * Robert Crowe and Jiayi Zhao of Google for helping us with our technical doubts.
50 |
--------------------------------------------------------------------------------
/custom_components/span_preparator.py:
--------------------------------------------------------------------------------
1 | """
2 | This component is responsible for separating provided samples into training and
3 | validation splits. It then converts them to TFRecords and stores those inside
4 | a GCS location. Finally, it returns the latest span id calculated from the current
5 | samples in `gcs_source_bucket`.
6 | """
7 |
8 | from tfx.dsl.component.experimental.decorators import component
9 | from tfx.dsl.component.experimental.annotations import Parameter
10 | from tfx.dsl.component.experimental.annotations import OutputArtifact, InputArtifact
11 | from tfx.types.experimental.simple_artifacts import Dataset
12 | from absl import logging
13 |
14 | from datetime import datetime
15 | import tensorflow as tf
16 | import random
17 | import os
18 |
19 |
20 | # Label-mapping.
21 | LABEL_DICT = {
22 | "airplane": 0,
23 | "automobile": 1,
24 | "bird": 2,
25 | "cat": 3,
26 | "deer": 4,
27 | "dog": 5,
28 | "frog": 6,
29 | "horse": 7,
30 | "ship": 8,
31 | "truck": 9,
32 | }
33 |
34 |
35 | # Images are byte-strings.
36 | def _bytestring_feature(list_of_bytestrings):
37 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))
38 |
39 |
40 | # Classes would be integers.
41 | def _int_feature(list_of_ints):
42 | return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))
43 |
44 |
45 | # Function that prepares a record for the tfrecord file
46 | # a record contains the image and its label.
47 | def to_tfrecord(img_bytes, label):
48 | feature = {
49 | "image": _bytestring_feature([img_bytes]),
50 | "label": _int_feature([label]),
51 | }
52 | return tf.train.Example(features=tf.train.Features(feature=feature))
53 |
54 |
55 | def write_tfrecords(filepaths, dest_gcs, tfrecord_filename, new_span, is_train):
56 | # For this project, we are serializing the images in one TFRecord only.
57 | # For more realistic purposes, this should be sharded.
58 | folder = "train" if is_train else "test"
59 |
60 | with tf.io.TFRecordWriter(tfrecord_filename) as writer:
61 | for path in filepaths:
62 | image_string = tf.io.read_file(path).numpy()
63 | class_name = path.split("/")[-1].split("_")[0]
64 | label = LABEL_DICT[class_name]
65 |
66 | example = to_tfrecord(image_string, label)
67 | writer.write(example.SerializeToString())
68 |
69 | # Copy over the zipped TFRecord file to the GCS Bucket and
70 | # remove the temporary files.
71 | logging.info(f"gsutil cp {tfrecord_filename} {dest_gcs}/span-{new_span}/{folder}/")
72 | os.system(f"gsutil cp {tfrecord_filename} {dest_gcs}/span-{new_span}/{folder}/")
73 | os.remove(tfrecord_filename)
74 |
75 |
76 | @component
77 | def SpanPreparator(
78 | is_retrain: InputArtifact[Dataset],
79 | gcs_source_bucket: Parameter[str],
80 | gcs_destination_bucket: Parameter[str],
81 | latest_span_id: OutputArtifact[Dataset],
82 | gcs_source_prefix: Parameter[str] = "",
83 | ):
84 | """
85 | :param is_retrain: Boolean to indicate if we are retraining.
86 | :param gcs_source_bucket: GCS location where the entry samples are residing.
87 | :param gcs_destination_bucket: GCS location where the converted TFRecords will be serialized.
88 | :param latest_span_id: Data span.
89 | :param gcs_source_prefix: Location prefix.
90 | """
91 | if is_retrain.get_string_custom_property("result") == "False":
92 | # Get the latest span and determine the new span.
93 | last_span_str = tf.io.gfile.glob(f"{gcs_destination_bucket}/span-*")[-1]
94 | last_span = int(last_span_str.split("-")[-1])
95 | new_span = last_span + 1
96 |
97 | timestamp = datetime.utcnow().strftime("%y%m%d-%H%M%S")
98 |
99 | # Get images from the provided GCS source.
100 | image_paths = tf.io.gfile.glob(f"gs://{gcs_source_bucket}/*.jpg")
101 | logging.info(image_paths)
102 | random.shuffle(image_paths)
103 |
104 | # Create train and validation splits.
105 | val_split = 0.2
106 | split_index = int(len(image_paths) * (1 - val_split))
107 | training_paths = image_paths[:split_index]
108 | validation_paths = image_paths[split_index:]
109 |
110 | # Write as TFRecords.
111 | write_tfrecords(
112 | training_paths,
113 | gcs_destination_bucket,
114 | tfrecord_filename=f"new_training_data_{timestamp}.tfrecord",
115 | new_span=new_span,
116 | is_train=True,
117 | )
118 | write_tfrecords(
119 | validation_paths,
120 | gcs_destination_bucket,
121 | tfrecord_filename=f"new_validation_data_{timestamp}.tfrecord",
122 | new_span=new_span,
123 | is_train=False,
124 | )
125 |
126 | logging.info("Removing images from batch prediction bucket.")
127 | os.system(
128 | f"gsutil mv gs://{gcs_source_bucket}/{gcs_source_prefix} gs://{gcs_source_bucket}/{gcs_source_prefix}_old"
129 | )
130 | latest_span_id.set_string_custom_property("latest_span", str(new_span))
131 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/notebooks/98_Batch_Prediction_Test.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "msGXve8btxnH"
17 | },
18 | "source": [
19 | "## Outline\n",
20 | "1. Upload the data to the designated GCS bucket\n",
21 | " - The data is stored in GCS bucket to simulate a real world scenario. In reality, data is collected in a central location(i.e. GCS bucket), and it will be used measure the model performance. We can measure the model performance much more reliable on a batch data than a single data(online)\n",
22 | "2. Perform batch prediction\n",
23 | "3. Measure the model performance(accuracy) on the data "
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "## Setup"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 21,
36 | "metadata": {
37 | "id": "heCy5KIqGmN5"
38 | },
39 | "outputs": [],
40 | "source": [
41 | "!pip install -q --upgrade google-cloud-aiplatform\n",
42 | "!pip install -q --upgrade google-cloud-storage"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {
49 | "colab": {
50 | "base_uri": "https://localhost:8080/"
51 | },
52 | "id": "P28WFWH2GpwG",
53 | "outputId": "722815da-315d-4e1e-dc4b-b121ec397f8a"
54 | },
55 | "outputs": [],
56 | "source": [
57 | "!gcloud init"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 44,
63 | "metadata": {
64 | "id": "6v_P4FAjGuGf"
65 | },
66 | "outputs": [],
67 | "source": [
68 | "from google.colab import auth\n",
69 | "\n",
70 | "auth.authenticate_user()"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "## Set Environment Values for GCP"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 57,
83 | "metadata": {
84 | "id": "2Y4ZMVcLHHkX"
85 | },
86 | "outputs": [],
87 | "source": [
88 | "GOOGLE_CLOUD_PROJECT = \"central-hangar-321813\" # @param {type:\"string\"}\n",
89 | "GOOGLE_CLOUD_REGION = \"us-central1\" # @param {type:\"string\"}\n",
90 | "\n",
91 | "MODEL_NAME = \"resnet_cifar_latest\" # @param {type:\"string\"}\n",
92 | "\n",
93 | "TEST_FILENAME = \"test-images.txt\" # @param {type:\"string\"}\n",
94 | "TEST_GCS_BUCKET = \"gs://batch-prediction-collection\" # @param {type:\"string\"}\n",
95 | "TEST_LOCAL_PATH = \"Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images\" # @param {type:\"string\"}"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "## Clone the Repository to Obtain Test Images\n",
103 | "- There are only 10 image files for simple testing purpose"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 12,
109 | "metadata": {
110 | "colab": {
111 | "base_uri": "https://localhost:8080/"
112 | },
113 | "id": "KHcBvji7Rbrs",
114 | "outputId": "31cefc83-90c1-4568-87b3-66e1bfba6384"
115 | },
116 | "outputs": [
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "Cloning into 'Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes'...\n",
122 | "remote: Enumerating objects: 100, done.\u001b[K\n",
123 | "remote: Counting objects: 100% (100/100), done.\u001b[K\n",
124 | "remote: Compressing objects: 100% (78/78), done.\u001b[K\n",
125 | "remote: Total 100 (delta 59), reused 38 (delta 21), pack-reused 0\u001b[K\n",
126 | "Receiving objects: 100% (100/100), 57.61 KiB | 14.40 MiB/s, done.\n",
127 | "Resolving deltas: 100% (59/59), done.\n"
128 | ]
129 | }
130 | ],
131 | "source": [
132 | "!git clone https://github.com/deep-diver/Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes.git"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 58,
138 | "metadata": {
139 | "colab": {
140 | "base_uri": "https://localhost:8080/"
141 | },
142 | "id": "GDexvxTXRfWH",
143 | "outputId": "2bd5f4d9-588d-414e-e819-0f8969034e29"
144 | },
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "['frog_0000.jpg',\n",
150 | " 'truck_0000.jpg',\n",
151 | " 'dog_0000.jpg',\n",
152 | " 'cat_0000.jpg',\n",
153 | " 'ship_0000.jpg',\n",
154 | " 'deer_0000.jpg',\n",
155 | " 'bird_0000.jpg',\n",
156 | " 'horse_0000.jpg',\n",
157 | " 'automobile_0000.jpg',\n",
158 | " 'airplane_0000.jpg']"
159 | ]
160 | },
161 | "execution_count": 58,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "from os import listdir\n",
168 | "\n",
169 | "test_files = listdir(TEST_LOCAL_PATH)\n",
170 | "test_files"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "## Create Import File to be Injected into Batch Prediction\n",
178 | "- Batch request input should follow a certain format in Vertex AI Prediction. JSONL, TFRecord, CSV, file list formats are available([link](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions#batch_request_input)), and file list format is used in this notebook"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 64,
184 | "metadata": {
185 | "id": "OJR9wy6GXyJv"
186 | },
187 | "outputs": [],
188 | "source": [
189 | "f = open(TEST_FILENAME, \"w\")\n",
190 | "\n",
191 | "for filename in test_files:\n",
192 | " f.write(f\"{TEST_GCS_BUCKET}/{filename}\\n\")\n",
193 | "\n",
194 | "f.close()"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 65,
200 | "metadata": {
201 | "colab": {
202 | "base_uri": "https://localhost:8080/"
203 | },
204 | "id": "mmRePB6aYRfR",
205 | "outputId": "2ea2566d-a1a0-4dae-c7ef-6152ea6f79cc"
206 | },
207 | "outputs": [
208 | {
209 | "name": "stdout",
210 | "output_type": "stream",
211 | "text": [
212 | "gs://batch-prediction-collection/frog_0000.jpg\n",
213 | "gs://batch-prediction-collection/truck_0000.jpg\n",
214 | "gs://batch-prediction-collection/dog_0000.jpg\n",
215 | "gs://batch-prediction-collection/cat_0000.jpg\n",
216 | "gs://batch-prediction-collection/ship_0000.jpg\n",
217 | "gs://batch-prediction-collection/deer_0000.jpg\n",
218 | "gs://batch-prediction-collection/bird_0000.jpg\n",
219 | "gs://batch-prediction-collection/horse_0000.jpg\n",
220 | "gs://batch-prediction-collection/automobile_0000.jpg\n",
221 | "gs://batch-prediction-collection/airplane_0000.jpg\n"
222 | ]
223 | }
224 | ],
225 | "source": [
226 | "!cat {TEST_FILENAME}"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "## Copy Test Images and Import File to GCS Bucket"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 66,
239 | "metadata": {
240 | "colab": {
241 | "base_uri": "https://localhost:8080/"
242 | },
243 | "id": "WdeuJt5YYVPw",
244 | "outputId": "d34b6904-a954-4a25-ba64-58d560f063f3"
245 | },
246 | "outputs": [
247 | {
248 | "name": "stdout",
249 | "output_type": "stream",
250 | "text": [
251 | "Copying file://test-images.txt [Content-Type=text/plain]...\n",
252 | "/ [0/1 files][ 0.0 B/ 480.0 B] 0% Done \r",
253 | "/ [1/1 files][ 480.0 B/ 480.0 B] 100% Done \r\n",
254 | "Operation completed over 1 objects/480.0 B. \n",
255 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/dog_0000.jpg [Content-Type=image/jpeg]...\n",
256 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/automobile_0000.jpg [Content-Type=image/jpeg]...\n",
257 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/cat_0000.jpg [Content-Type=image/jpeg]...\n",
258 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/frog_0000.jpg [Content-Type=image/jpeg]...\n",
259 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/bird_0000.jpg [Content-Type=image/jpeg]...\n",
260 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/airplane_0000.jpg [Content-Type=image/jpeg]...\n",
261 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/horse_0000.jpg [Content-Type=image/jpeg]...\n",
262 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/deer_0000.jpg [Content-Type=image/jpeg]...\n",
263 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/truck_0000.jpg [Content-Type=image/jpeg]...\n",
264 | "Copying file://Continuous-Adaptation-for-Machine-Learning-System-to-Data-Changes/notebooks/test-images/ship_0000.jpg [Content-Type=image/jpeg]...\n",
265 | "/ [10/10 files][ 9.2 KiB/ 9.2 KiB] 100% Done \n",
266 | "Operation completed over 10 objects/9.2 KiB. \n"
267 | ]
268 | }
269 | ],
270 | "source": [
271 | "!gsutil -m cp -r {TEST_FILENAME} {TEST_GCS_BUCKET}\n",
272 | "!gsutil -m cp -r {TEST_LOCAL_PATH}/*.jpg {TEST_GCS_BUCKET}"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {
278 | "id": "-mVFSRUQHdEj"
279 | },
280 | "source": [
281 | "## Batch Prediction"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 68,
287 | "metadata": {
288 | "id": "z_0QXPfcHc3p"
289 | },
290 | "outputs": [],
291 | "source": [
292 | "import google.cloud.aiplatform as aiplatform\n",
293 | "from typing import Union, Sequence\n",
294 | "\n",
295 | "\n",
296 | "def create_batch_prediction_job_dedicated_resources_sample(\n",
297 | " project: str,\n",
298 | " location: str,\n",
299 | " model_resource_name: str,\n",
300 | " job_display_name: str,\n",
301 | " gcs_source: Union[str, Sequence[str]],\n",
302 | " gcs_destination: str,\n",
303 | " instances_format: str = \"file-list\",\n",
304 | " machine_type: str = \"n1-standard-2\",\n",
305 | " accelerator_count: int = 1,\n",
306 | " accelerator_type: str = \"NVIDIA_TESLA_K80\",\n",
307 | " starting_replica_count: int = 1,\n",
308 | " max_replica_count: int = 1,\n",
309 | " sync: bool = True,\n",
310 | "):\n",
311 | " aiplatform.init(project=project, location=location)\n",
312 | "\n",
313 | " my_model = aiplatform.Model(model_resource_name)\n",
314 | "\n",
315 | " batch_prediction_job = my_model.batch_predict(\n",
316 | " job_display_name=job_display_name,\n",
317 | " instances_format=instances_format,\n",
318 | " gcs_source=gcs_source,\n",
319 | " gcs_destination_prefix=gcs_destination,\n",
320 | " machine_type=machine_type,\n",
321 | " accelerator_count=accelerator_count,\n",
322 | " accelerator_type=accelerator_type,\n",
323 | " starting_replica_count=starting_replica_count,\n",
324 | " max_replica_count=max_replica_count,\n",
325 | " sync=sync,\n",
326 | " )\n",
327 | "\n",
328 | " batch_prediction_job.wait()\n",
329 | "\n",
330 | " print(batch_prediction_job.display_name)\n",
331 | " print(batch_prediction_job.resource_name)\n",
332 | " print(batch_prediction_job.state)\n",
333 | " return batch_prediction_job"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 69,
339 | "metadata": {
340 | "id": "BkAJqucqaw_r"
341 | },
342 | "outputs": [],
343 | "source": [
344 | "from datetime import datetime\n",
345 | "\n",
346 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 70,
352 | "metadata": {
353 | "colab": {
354 | "base_uri": "https://localhost:8080/"
355 | },
356 | "id": "Hb1g6qdNLTfP",
357 | "outputId": "442d7ada-b10f-456b-b2e4-7ccb7df86e5a"
358 | },
359 | "outputs": [
360 | {
361 | "name": "stdout",
362 | "output_type": "stream",
363 | "text": [
364 | "INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob\n",
365 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n",
366 | "INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:\n",
367 | "INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104')\n",
368 | "INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:\n",
369 | "https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/1680882799009071104?project=31482268105\n",
370 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
371 | "JobState.JOB_STATE_RUNNING\n",
372 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
373 | "JobState.JOB_STATE_RUNNING\n",
374 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
375 | "JobState.JOB_STATE_RUNNING\n",
376 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
377 | "JobState.JOB_STATE_RUNNING\n",
378 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
379 | "JobState.JOB_STATE_RUNNING\n",
380 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
381 | "JobState.JOB_STATE_RUNNING\n",
382 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
383 | "JobState.JOB_STATE_RUNNING\n",
384 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
385 | "JobState.JOB_STATE_RUNNING\n",
386 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104 current state:\n",
387 | "JobState.JOB_STATE_SUCCEEDED\n",
388 | "INFO:google.cloud.aiplatform.jobs:BatchPredictionJob run completed. Resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n",
389 | "resnet_cifar_latest-20210917022404\n",
390 | "projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104\n",
391 | "JobState.JOB_STATE_SUCCEEDED\n"
392 | ]
393 | },
394 | {
395 | "data": {
396 | "text/plain": [
397 | " \n",
398 | "resource name: projects/31482268105/locations/us-central1/batchPredictionJobs/1680882799009071104"
399 | ]
400 | },
401 | "execution_count": 70,
402 | "metadata": {},
403 | "output_type": "execute_result"
404 | }
405 | ],
406 | "source": [
407 | "create_batch_prediction_job_dedicated_resources_sample(\n",
408 | " project=GOOGLE_CLOUD_PROJECT,\n",
409 | " location=GOOGLE_CLOUD_REGION,\n",
410 | " model_resource_name=\"2008244793993330688\",\n",
411 | " job_display_name=f\"{MODEL_NAME}-{TIMESTAMP}\",\n",
412 | " gcs_source=[f\"{TEST_GCS_BUCKET}/{TEST_FILENAME}\"],\n",
413 | " gcs_destination=f\"{TEST_GCS_BUCKET}/results/\",\n",
414 | " accelerator_type=None,\n",
415 | " accelerator_count=None,\n",
416 | ")"
417 | ]
418 | },
419 | {
420 | "cell_type": "markdown",
421 | "metadata": {},
422 | "source": [
423 | "## Evaluate Batch Prediction"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 72,
429 | "metadata": {
430 | "colab": {
431 | "base_uri": "https://localhost:8080/"
432 | },
433 | "id": "b9Hz7CVqbE5o",
434 | "outputId": "08df5dd9-79b1-458b-aa63-b393eee60238"
435 | },
436 | "outputs": [
437 | {
438 | "name": "stdout",
439 | "output_type": "stream",
440 | "text": [
441 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_24_05_801Z/prediction.results-00000-of-00001...\n",
442 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r",
443 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T18_47_39_122Z/prediction.results-00000-of-00001...\n",
444 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r",
445 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T18_47_39_122Z/prediction.errors_stats-00000-of-00001...\n",
446 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r",
447 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_23_55_003Z/prediction.errors_stats-00000-of-00001...\n",
448 | "/ [0/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r",
449 | "/ [1/6 files][ 0.0 B/ 1.3 KiB] 0% Done \r",
450 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_23_55_003Z/prediction.results-00000-of-00001...\n",
451 | "Copying gs://batch-prediction-collection/results/prediction-resnet_cifar_latest-2021_09_16T19_24_05_801Z/prediction.errors_stats-00000-of-00001...\n",
452 | "/ [6/6 files][ 1.3 KiB/ 1.3 KiB] 100% Done \n",
453 | "Operation completed over 6 objects/1.3 KiB. \n"
454 | ]
455 | }
456 | ],
457 | "source": [
458 | "import os\n",
459 | "import json\n",
460 | "\n",
461 | "RESULTS_DIRECTORY = \"results\"\n",
462 | "RESULTS_DIRECTORY_FULL = f'{TEST_GCS_BUCKET}/{RESULTS_DIRECTORY}'\n",
463 | "\n",
464 | "# Create missing directories\n",
465 | "os.makedirs(RESULTS_DIRECTORY, exist_ok=True)\n",
466 | "\n",
467 | "# Get the Cloud Storage paths for each result\n",
468 | "!gsutil -m cp -r $RESULTS_DIRECTORY_FULL $RESULTS_DIRECTORY\n",
469 | "\n",
470 | "# Get most recently modified directory\n",
471 | "latest_directory = max(\n",
472 | " [\n",
473 | " os.path.join(RESULTS_DIRECTORY, d)\n",
474 | " for d in os.listdir(RESULTS_DIRECTORY)\n",
475 | " ],\n",
476 | " key=os.path.getmtime,\n",
477 | ")\n",
478 | "\n",
479 | "# Get downloaded results in directory\n",
480 | "results_files = []\n",
481 | "for dirpath, subdirs, files in os.walk(latest_directory):\n",
482 | " for file in files:\n",
483 | " if file.startswith(\"prediction.results\"):\n",
484 | " results_files.append(os.path.join(dirpath, file))\n",
485 | "\n",
486 | "# Consolidate all the results into a list\n",
487 | "results = []\n",
488 | "for results_file in results_files:\n",
489 | " # Download each result\n",
490 | " with open(results_file, \"r\") as file:\n",
491 | " results.extend([json.loads(line) for line in file.readlines()])"
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": 73,
497 | "metadata": {
498 | "colab": {
499 | "base_uri": "https://localhost:8080/"
500 | },
501 | "id": "aCHYk8L0p6ED",
502 | "outputId": "ca9348c1-db4e-43e2-cf41-1a066d727389"
503 | },
504 | "outputs": [
505 | {
506 | "data": {
507 | "text/plain": [
508 | "[{'instance': 'gs://batch-prediction-collection/airplane_0000.jpg',\n",
509 | " 'prediction': {'confidence': 0.635806859, 'label': 'ship'}},\n",
510 | " {'instance': 'gs://batch-prediction-collection/cat_0000.jpg',\n",
511 | " 'prediction': {'confidence': 0.514597297, 'label': 'cat'}},\n",
512 | " {'instance': 'gs://batch-prediction-collection/ship_0000.jpg',\n",
513 | " 'prediction': {'confidence': 0.944843113, 'label': 'ship'}},\n",
514 | " {'instance': 'gs://batch-prediction-collection/bird_0000.jpg',\n",
515 | " 'prediction': {'confidence': 0.710508406, 'label': 'horse'}},\n",
516 | " {'instance': 'gs://batch-prediction-collection/truck_0000.jpg',\n",
517 | " 'prediction': {'confidence': 0.980968714, 'label': 'truck'}},\n",
518 | " {'instance': 'gs://batch-prediction-collection/frog_0000.jpg',\n",
519 | " 'prediction': {'confidence': 0.696931422, 'label': 'frog'}},\n",
520 | " {'instance': 'gs://batch-prediction-collection/dog_0000.jpg',\n",
521 | " 'prediction': {'confidence': 0.382295936, 'label': 'cat'}},\n",
522 | " {'instance': 'gs://batch-prediction-collection/deer_0000.jpg',\n",
523 | " 'prediction': {'confidence': 0.437720776, 'label': 'dog'}},\n",
524 | " {'instance': 'gs://batch-prediction-collection/automobile_0000.jpg',\n",
525 | " 'prediction': {'confidence': 0.460335433, 'label': 'automobile'}},\n",
526 | " {'instance': 'gs://batch-prediction-collection/horse_0000.jpg',\n",
527 | " 'prediction': {'confidence': 0.918733776, 'label': 'dog'}}]"
528 | ]
529 | },
530 | "execution_count": 73,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "results"
537 | ]
538 | },
539 | {
540 | "cell_type": "code",
541 | "execution_count": 83,
542 | "metadata": {
543 | "colab": {
544 | "base_uri": "https://localhost:8080/"
545 | },
546 | "id": "g-6UdwcKqDKO",
547 | "outputId": "4545ebf3-71fa-477f-8c51-1068a6d65226"
548 | },
549 | "outputs": [
550 | {
551 | "name": "stdout",
552 | "output_type": "stream",
553 | "text": [
554 | "label(airplane)/prediction(ship)\n",
555 | "label(cat)/prediction(cat)\n",
556 | "label(ship)/prediction(ship)\n",
557 | "label(bird)/prediction(horse)\n",
558 | "label(truck)/prediction(truck)\n",
559 | "label(frog)/prediction(frog)\n",
560 | "label(dog)/prediction(cat)\n",
561 | "label(deer)/prediction(dog)\n",
562 | "label(automobile)/prediction(automobile)\n",
563 | "label(horse)/prediction(dog)\n",
564 | "\n",
565 | "number of results: 10\n",
566 | "number of correct: 5\n",
567 | "Accuracy: 0.5\n"
568 | ]
569 | }
570 | ],
571 | "source": [
572 | "num_correct = 0\n",
573 | "\n",
574 | "for result in results:\n",
575 | " label = os.path.basename(result[\"instance\"]).split(\"_\")[0]\n",
576 | " prediction = result[\"prediction\"][\"label\"]\n",
577 | "\n",
578 | " print(f\"label({label})/prediction({prediction})\")\n",
579 | " if label == prediction:\n",
580 | " num_correct = num_correct + 1\n",
581 | "\n",
582 | "print()\n",
583 | "print(f\"number of results: {len(results)}\")\n",
584 | "print(f\"number of correct: {num_correct}\")\n",
585 | "print(f\"Accuracy: {num_correct/len(results)}\")"
586 | ]
587 | }
588 | ],
589 | "metadata": {
590 | "colab": {
591 | "include_colab_link": true,
592 | "name": "03_Batch_Prediction_Performance.ipynb",
593 | "provenance": []
594 | },
595 | "kernelspec": {
596 | "display_name": "Python 3 (ipykernel)",
597 | "language": "python",
598 | "name": "python3"
599 | },
600 | "language_info": {
601 | "codemirror_mode": {
602 | "name": "ipython",
603 | "version": 3
604 | },
605 | "file_extension": ".py",
606 | "mimetype": "text/x-python",
607 | "name": "python",
608 | "nbconvert_exporter": "python",
609 | "pygments_lexer": "ipython3",
610 | "version": "3.8.2"
611 | }
612 | },
613 | "nbformat": 4,
614 | "nbformat_minor": 2
615 | }
616 |
--------------------------------------------------------------------------------
/notebooks/04_Cloud_Scheduler_Trigger.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "T_WNxUbMmgfw"
17 | },
18 | "source": [
19 | "# Outline"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "id": "V9JhU_tzmgfz"
26 | },
27 | "source": [
28 | "1. Create Pub/Sub Topic ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_function_trigger.ipynb))\n",
29 | "2. Deploy Cloud Function ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_function_trigger.ipynb))\n",
30 | " - check if there are enough number of images in a specific GCS bucket\n",
31 | "3. Publish Pub/Sub Topic to trigger batch prediction pipeline ([refer](https://github.com/sayakpaul/CI-CD-for-Model-Training/blob/main/cloud_scheduler_trigger.ipynb))\n",
32 | " - need pipeline JSON spec somewhere in GCS"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {
38 | "id": "Iva2o8C-mujw"
39 | },
40 | "source": [
41 | "# Setup"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 1,
47 | "metadata": {
48 | "colab": {
49 | "base_uri": "https://localhost:8080/"
50 | },
51 | "id": "2A7-ml0Yt6lX",
52 | "outputId": "8f368fdf-1a56-4440-91d3-0c766d5ed369"
53 | },
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "\u001b[?25l\r",
60 | "\u001b[K |███▌ | 10 kB 30.5 MB/s eta 0:00:01\r",
61 | "\u001b[K |███████ | 20 kB 21.4 MB/s eta 0:00:01\r",
62 | "\u001b[K |██████████▍ | 30 kB 16.3 MB/s eta 0:00:01\r",
63 | "\u001b[K |█████████████▉ | 40 kB 14.2 MB/s eta 0:00:01\r",
64 | "\u001b[K |█████████████████▍ | 51 kB 6.6 MB/s eta 0:00:01\r",
65 | "\u001b[K |████████████████████▉ | 61 kB 7.0 MB/s eta 0:00:01\r",
66 | "\u001b[K |████████████████████████▎ | 71 kB 7.5 MB/s eta 0:00:01\r",
67 | "\u001b[K |███████████████████████████▊ | 81 kB 8.4 MB/s eta 0:00:01\r",
68 | "\u001b[K |███████████████████████████████▏| 92 kB 8.6 MB/s eta 0:00:01\r",
69 | "\u001b[K |████████████████████████████████| 94 kB 2.9 MB/s \n",
70 | "\u001b[?25h\u001b[?25l\r",
71 | "\u001b[K |███████▍ | 10 kB 38.2 MB/s eta 0:00:01\r",
72 | "\u001b[K |██████████████▉ | 20 kB 43.9 MB/s eta 0:00:01\r",
73 | "\u001b[K |██████████████████████▎ | 30 kB 51.4 MB/s eta 0:00:01\r",
74 | "\u001b[K |█████████████████████████████▊ | 40 kB 54.8 MB/s eta 0:00:01\r",
75 | "\u001b[K |████████████████████████████████| 44 kB 2.9 MB/s \n",
76 | "\u001b[?25h"
77 | ]
78 | }
79 | ],
80 | "source": [
81 | "!pip install --upgrade -q google-cloud-scheduler"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "id": "qJLNWsPamwNw"
89 | },
90 | "outputs": [],
91 | "source": [
92 | "!gcloud init"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 3,
98 | "metadata": {
99 | "id": "TRkHpHnQmzof"
100 | },
101 | "outputs": [],
102 | "source": [
103 | "from google.colab import auth\n",
104 | "\n",
105 | "auth.authenticate_user()"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 43,
111 | "metadata": {
112 | "id": "WH4ZvM_3m397"
113 | },
114 | "outputs": [],
115 | "source": [
116 | "GOOGLE_CLOUD_PROJECT = \"gcp-ml-172005\" # @param {type:\"string\"}\n",
117 | "GOOGLE_CLOUD_REGION = \"us-central1\"\n",
118 | "\n",
119 | "GCS_BUCKET_NAME = \"cifar10-experimental-csp2\" # @param {type:\"string\"}\n",
120 | "PIPELINE_NAME = \"continuous-adaptation-for-data-changes-batch\" # @param {type:\"string\"}\n",
121 | "PIPELINE_ROOT = \"gs://{}/pipeline_root/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n",
122 | "PIPELINE_LOCATION = f\"{PIPELINE_ROOT}/{PIPELINE_NAME}_pipeline.json\"\n",
123 | "\n",
124 | "PUBSUB_TOPIC = f\"trigger-{PIPELINE_NAME}\"\n",
125 | "\n",
126 | "SCHEDULER_JOB_NAME = f\"scheduler-job-{PUBSUB_TOPIC}\"\n",
127 | "\n",
128 | "IMAGE_LOCATION_BUCKET = \"batch-prediction-collection-3\" # @param {type:\"string\"}"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 44,
134 | "metadata": {
135 | "colab": {
136 | "base_uri": "https://localhost:8080/",
137 | "height": 35
138 | },
139 | "id": "kpidhm309Ed9",
140 | "outputId": "fadb39ba-d84e-462d-803c-fce387229362"
141 | },
142 | "outputs": [
143 | {
144 | "data": {
145 | "application/vnd.google.colaboratory.intrinsic+json": {
146 | "type": "string"
147 | },
148 | "text/plain": [
149 | "'batch-prediction-collection-3'"
150 | ]
151 | },
152 | "execution_count": 44,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "IMAGE_LOCATION_BUCKET"
159 | ]
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "metadata": {
164 | "id": "3622c4BaodLT"
165 | },
166 | "source": [
167 | "# Create Pub/Sub Topic"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {
174 | "colab": {
175 | "base_uri": "https://localhost:8080/"
176 | },
177 | "id": "_1RNMdR-ofBn",
178 | "outputId": "b53dac52-ab2b-49ad-d92c-eec4782fad1b"
179 | },
180 | "outputs": [
181 | {
182 | "name": "stdout",
183 | "output_type": "stream",
184 | "text": [
185 | "Created topic [projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch].\n"
186 | ]
187 | }
188 | ],
189 | "source": [
190 | "!gcloud pubsub topics create {PUBSUB_TOPIC}"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "id": "s2ISbBXvoiN7"
197 | },
198 | "source": [
199 | "# Deploy Cloud Function"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {
205 | "id": "TeYmsiYAqroy"
206 | },
207 | "source": [
208 | "### Create Cloud Function Directory"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 6,
214 | "metadata": {
215 | "id": "y_hJOA47prkp"
216 | },
217 | "outputs": [],
218 | "source": [
219 | "!mkdir -p cloud_function\n",
220 | "!touch cloud_function/__init__.py"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {
226 | "id": "p3gLM4SQqxMa"
227 | },
228 | "source": [
229 | "### Create Requirements.txt"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 7,
235 | "metadata": {
236 | "id": "o-7JN3huqdEO"
237 | },
238 | "outputs": [],
239 | "source": [
240 | "_cloud_function_dep = \"cloud_function/requirements.txt\""
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 8,
246 | "metadata": {
247 | "colab": {
248 | "base_uri": "https://localhost:8080/"
249 | },
250 | "id": "W9a9myWFqj14",
251 | "outputId": "ce6ef3f5-29dc-4651-ee62-167fb95d4634"
252 | },
253 | "outputs": [
254 | {
255 | "name": "stdout",
256 | "output_type": "stream",
257 | "text": [
258 | "Writing cloud_function/requirements.txt\n"
259 | ]
260 | }
261 | ],
262 | "source": [
263 | "%%writefile {_cloud_function_dep}\n",
264 | "\n",
265 | "kfp==1.6.2\n",
266 | "google-cloud-aiplatform\n",
267 | "google-cloud-storage"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {
273 | "id": "WhaDWKaRqzzH"
274 | },
275 | "source": [
276 | "### Create Cloud Function Module"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 9,
282 | "metadata": {
283 | "colab": {
284 | "base_uri": "https://localhost:8080/"
285 | },
286 | "id": "npdLDFazZX0v",
287 | "outputId": "dead3d54-31ab-4588-bb98-ee1e6b5610cb"
288 | },
289 | "outputs": [
290 | {
291 | "name": "stdout",
292 | "output_type": "stream",
293 | "text": [
294 | "Requirement already satisfied: google-cloud-storage in /usr/local/lib/python3.7/dist-packages (1.18.1)\n",
295 | "Requirement already satisfied: google-resumable-media<0.5.0dev,>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (0.4.1)\n",
296 | "Requirement already satisfied: google-auth>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (1.35.0)\n",
297 | "Requirement already satisfied: google-cloud-core<2.0dev,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-storage) (1.0.3)\n",
298 | "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (4.7.2)\n",
299 | "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (1.15.0)\n",
300 | "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (57.4.0)\n",
301 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (4.2.4)\n",
302 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.2.0->google-cloud-storage) (0.2.8)\n",
303 | "Requirement already satisfied: google-api-core<2.0.0dev,>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.26.3)\n",
304 | "Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (3.17.3)\n",
305 | "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.23.0)\n",
306 | "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (21.0)\n",
307 | "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2018.9)\n",
308 | "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.53.0)\n",
309 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.4.7)\n",
310 | "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2.0->google-cloud-storage) (0.4.8)\n",
311 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2021.5.30)\n",
312 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (3.0.4)\n",
313 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (2.10)\n",
314 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.0->google-cloud-storage) (1.24.3)\n"
315 | ]
316 | }
317 | ],
318 | "source": [
319 | "!pip install google-cloud-storage"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 40,
325 | "metadata": {
326 | "id": "OwldC4ntpD7Z"
327 | },
328 | "outputs": [],
329 | "source": [
330 | "_cloud_function_file = \"cloud_function/main.py\""
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 57,
336 | "metadata": {
337 | "colab": {
338 | "base_uri": "https://localhost:8080/"
339 | },
340 | "id": "59_-cyfIonqP",
341 | "outputId": "1ef71f8a-dc15-48ed-dbcb-97eff5cd7531"
342 | },
343 | "outputs": [
344 | {
345 | "name": "stdout",
346 | "output_type": "stream",
347 | "text": [
348 | "Overwriting cloud_function/main.py\n"
349 | ]
350 | }
351 | ],
352 | "source": [
353 | "%%writefile {_cloud_function_file}\n",
354 | "\n",
355 | "import os\n",
356 | "import re\n",
357 | "import json\n",
358 | "import logging\n",
359 | "import base64\n",
360 | "\n",
361 | "from datetime import datetime\n",
362 | "\n",
363 | "from kfp.v2.google.client import AIPlatformClient\n",
364 | "from google.cloud import storage\n",
365 | "\n",
366 | "\n",
367 | "def get_number_of_images(storage_client, bucket, latest_directory):\n",
368 | " blobs = storage_client.list_blobs(bucket, prefix=latest_directory)\n",
369 | "\n",
370 | " count = 0\n",
371 | " for blob in blobs:\n",
372 | " if blob.name.split(\".\")[-1] == \"jpg\":\n",
373 | " count = count + 1\n",
374 | "\n",
375 | " return count\n",
376 | "\n",
377 | "\n",
378 | "def is_there_enough_images(storage_client, bucket, latest_directory, threshold):\n",
379 | " number_of_images = get_number_of_images(storage_client, bucket, latest_directory)\n",
380 | " print(f\"number of images = {number_of_images}\")\n",
381 | " return number_of_images >= threshold\n",
382 | "\n",
383 | "\n",
384 | "def get_latest_directory(storage_client, bucket):\n",
385 | " blobs = storage_client.list_blobs(bucket)\n",
386 | "\n",
387 | " folders = list(\n",
388 | " set(\n",
389 | " [\n",
390 | " os.path.dirname(blob.name)\n",
391 | " for blob in blobs\n",
392 | " if bool(\n",
393 | " re.match(\n",
394 | " \"[1-9][0-9][0-9][0-9]-[0-1][0-9]\", os.path.dirname(blob.name)\n",
395 | " )\n",
396 | " )\n",
397 | " is True\n",
398 | " ]\n",
399 | " )\n",
400 | " )\n",
401 | "\n",
402 | " folders.sort(key=lambda date: datetime.strptime(date, \"%Y-%m\"))\n",
403 | " print(folders[0])\n",
404 | " return folders[0]\n",
405 | "\n",
406 | "\n",
407 | "def trigger_pipeline(event, context):\n",
408 | " # Parse the environment variables.\n",
409 | " project = os.getenv(\"PROJECT\")\n",
410 | " region = os.getenv(\"REGION\")\n",
411 | " gcs_pipeline_file_location = os.getenv(\"GCS_PIPELINE_FILE_LOCATION\")\n",
412 | " gcs_image_bucket = os.getenv(\"GCS_IMAGE_BUCKET\")\n",
413 | "\n",
414 | " print(project)\n",
415 | " print(region)\n",
416 | " print(gcs_pipeline_file_location)\n",
417 | " print(gcs_image_bucket)\n",
418 | "\n",
419 | " threshold = 100\n",
420 | "\n",
421 | " # Check if the pipeline file exists in the provided GCS Bucket.\n",
422 | " storage_client = storage.Client()\n",
423 | " latest_directory = get_latest_directory(storage_client, gcs_image_bucket)\n",
424 | "\n",
425 | " if is_there_enough_images(\n",
426 | " storage_client, gcs_image_bucket, latest_directory, threshold\n",
427 | " ):\n",
428 | " path_parts = gcs_pipeline_file_location.replace(\"gs://\", \"\").split(\"/\")\n",
429 | " pipeline_bucket = path_parts[0]\n",
430 | " pipeline_blob = \"/\".join(path_parts[1:])\n",
431 | "\n",
432 | " pipeline_bucket = storage_client.bucket(pipeline_bucket)\n",
433 | " blob = storage.Blob(bucket=pipeline_bucket, name=pipeline_blob)\n",
434 | "\n",
435 | " if not blob.exists(storage_client):\n",
436 | " raise ValueError(f\"{gcs_pipeline_file_location} does not exist.\")\n",
437 | "\n",
438 | " # Initialize Vertex AI API client and submit for pipeline execution.\n",
439 | " api_client = AIPlatformClient(project_id=project, region=region)\n",
440 | "\n",
441 | " response = api_client.create_run_from_job_spec(\n",
442 | " job_spec_path=gcs_pipeline_file_location,\n",
443 | " parameter_values={\"data_gcs_prefix\": latest_directory},\n",
444 | " enable_caching=True,\n",
445 | " )\n",
446 | "\n",
447 | " logging.info(response)"
448 | ]
449 | },
450 | {
451 | "cell_type": "markdown",
452 | "metadata": {
453 | "id": "kUjuI5z7rH0Z"
454 | },
455 | "source": [
456 | "### Deploy Cloud Function"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 58,
462 | "metadata": {
463 | "id": "8zjMovG1WyIV"
464 | },
465 | "outputs": [],
466 | "source": [
467 | "!cd cloud_function"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 59,
473 | "metadata": {
474 | "colab": {
475 | "base_uri": "https://localhost:8080/"
476 | },
477 | "id": "lAcMJL-9qpoP",
478 | "outputId": "bf56830a-e1c4-460d-c50a-5be902669698"
479 | },
480 | "outputs": [
481 | {
482 | "name": "stdout",
483 | "output_type": "stream",
484 | "text": [
485 | "PROJECT=gcp-ml-172005,REGION=us-central1,GCS_PIPELINE_FILE_LOCATION=gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes-batch/continuous-adaptation-for-data-changes-batch_pipeline.json,GCS_IMAGE_BUCKET=batch-prediction-collection-3\n"
486 | ]
487 | }
488 | ],
489 | "source": [
490 | "ENV_VARS=f\"\"\"\\\n",
491 | "PROJECT={GOOGLE_CLOUD_PROJECT},\\\n",
492 | "REGION={GOOGLE_CLOUD_REGION},\\\n",
493 | "GCS_PIPELINE_FILE_LOCATION={PIPELINE_LOCATION},\\\n",
494 | "GCS_IMAGE_BUCKET={IMAGE_LOCATION_BUCKET}\n",
495 | "\"\"\"\n",
496 | "\n",
497 | "!echo {ENV_VARS}"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 60,
503 | "metadata": {
504 | "colab": {
505 | "base_uri": "https://localhost:8080/"
506 | },
507 | "id": "bdw7HUcSpBU7",
508 | "outputId": "ca11ac12-819d-4d3d-9c8a-5a1f9bc1a1b6"
509 | },
510 | "outputs": [
511 | {
512 | "name": "stdout",
513 | "output_type": "stream",
514 | "text": [
515 | "\n",
516 | "For Cloud Build Logs, visit: https://console.cloud.google.com/cloud-build/builds;region=us-central1/b6851c4a-a1a5-47c2-8cb1-a22f927867ad?project=874401645461\n",
517 | "availableMemoryMb: 256\n",
518 | "buildId: 5dea4cb3-7602-4c7e-992a-2ddcaa5566bd\n",
519 | "buildName: projects/874401645461/locations/us-central1/builds/5dea4cb3-7602-4c7e-992a-2ddcaa5566bd\n",
520 | "entryPoint: trigger_pipeline\n",
521 | "environmentVariables:\n",
522 | " GCS_IMAGE_BUCKET: batch-prediction-collection-3\n",
523 | " GCS_IMAGE_FILE_LOCATION: gs://batch-prediction-collection-3\n",
524 | " GCS_PIPELINE_FILE_LOCATION: gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes-batch/continuous-adaptation-for-data-changes-batch_pipeline.json\n",
525 | " PROJECT: gcp-ml-172005\n",
526 | " REGION: us-central1\n",
527 | "eventTrigger:\n",
528 | " eventType: google.pubsub.topic.publish\n",
529 | " failurePolicy: {}\n",
530 | " resource: projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch\n",
531 | " service: pubsub.googleapis.com\n",
532 | "ingressSettings: ALLOW_ALL\n",
533 | "labels:\n",
534 | " deployment-tool: cli-gcloud\n",
535 | "name: projects/gcp-ml-172005/locations/us-central1/functions/trigger-continuous-adaptation-for-data-changes-batch-fn\n",
536 | "runtime: python37\n",
537 | "serviceAccountEmail: gcp-ml-172005@appspot.gserviceaccount.com\n",
538 | "sourceArchiveUrl: gs://cifar10-experimental-csp2/us-central1-projects/gcp-ml-172005/locations/us-central1/functions/trigger-continuous-adaptation-for-data-changes-batch-fn-rkbjepcrnujy.zip\n",
539 | "status: ACTIVE\n",
540 | "timeout: 60s\n",
541 | "updateTime: '2021-10-19T00:48:41.632Z'\n",
542 | "versionId: '14'\n"
543 | ]
544 | }
545 | ],
546 | "source": [
547 | "BUCKET = f'gs://{GCS_BUCKET_NAME}'\n",
548 | "CLOUD_FUNCTION_NAME = f'trigger-{PIPELINE_NAME}-fn'\n",
549 | "\n",
550 | "!gcloud functions deploy {CLOUD_FUNCTION_NAME} \\\n",
551 | " --region={GOOGLE_CLOUD_REGION} \\\n",
552 | " --trigger-topic={PUBSUB_TOPIC} \\\n",
553 | " --runtime=python37 \\\n",
554 | " --source=cloud_function\\\n",
555 | " --entry-point=trigger_pipeline\\\n",
556 | " --stage-bucket={BUCKET}\\\n",
557 | " --update-env-vars={ENV_VARS}"
558 | ]
559 | },
560 | {
561 | "cell_type": "markdown",
562 | "metadata": {
563 | "id": "NEZIpLjNrNe6"
564 | },
565 | "source": [
566 | "### See the Progress"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": null,
572 | "metadata": {
573 | "colab": {
574 | "base_uri": "https://localhost:8080/",
575 | "height": 34
576 | },
577 | "id": "WOudc6YvrPZA",
578 | "outputId": "9c70e204-86c8-44f7-db60-60cf1985aa7b"
579 | },
580 | "outputs": [
581 | {
582 | "data": {
583 | "text/html": [
584 | "See the Cloud Function details here."
585 | ],
586 | "text/plain": [
587 | ""
588 | ]
589 | },
590 | "metadata": {},
591 | "output_type": "display_data"
592 | }
593 | ],
594 | "source": [
595 | "import IPython\n",
596 | "\n",
597 | "cloud_fn_url = f\"https://console.cloud.google.com/functions/details/{GOOGLE_CLOUD_REGION}/{CLOUD_FUNCTION_NAME}\"\n",
598 | "html = (\n",
599 | " f'See the Cloud Function details here.'\n",
600 | ")\n",
601 | "IPython.display.display(IPython.display.HTML(html))"
602 | ]
603 | },
604 | {
605 | "cell_type": "markdown",
606 | "metadata": {
607 | "id": "iknpM94_tnOc"
608 | },
609 | "source": [
610 | "# Create Cloud Scheduler's Job"
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": null,
616 | "metadata": {
617 | "colab": {
618 | "base_uri": "https://localhost:8080/"
619 | },
620 | "id": "7dx03Q6Qt0n4",
621 | "outputId": "d8a6f937-a4fc-4a60-859c-6fef96ae4df2"
622 | },
623 | "outputs": [
624 | {
625 | "name": "stdout",
626 | "output_type": "stream",
627 | "text": [
628 | "name: projects/gcp-ml-172005/locations/us-central1/jobs/scheduler-job-trigger-continuous-adaptation-for-data-changes-batch\n",
629 | "pubsubTarget:\n",
630 | " attributes:\n",
631 | " name: scheduler\n",
632 | " topicName: projects/gcp-ml-172005/topics/trigger-continuous-adaptation-for-data-changes-batch\n",
633 | "retryConfig:\n",
634 | " maxBackoffDuration: 3600s\n",
635 | " maxDoublings: 16\n",
636 | " maxRetryDuration: 0s\n",
637 | " minBackoffDuration: 5s\n",
638 | "schedule: '*/3 * * * *'\n",
639 | "state: ENABLED\n",
640 | "timeZone: Etc/UTC\n",
641 | "userUpdateTime: '2021-10-18T01:10:04Z'\n"
642 | ]
643 | }
644 | ],
645 | "source": [
646 | "!gcloud scheduler jobs create pubsub $SCHEDULER_JOB_NAME --schedule \"*/3 * * * *\" --topic $PUBSUB_TOPIC --attributes name=scheduler #every hour"
647 | ]
648 | }
649 | ],
650 | "metadata": {
651 | "colab": {
652 | "include_colab_link": true,
653 | "name": "04_Cloud_Scheduler_Trigger.ipynb",
654 | "provenance": []
655 | },
656 | "kernelspec": {
657 | "display_name": "Python 3 (ipykernel)",
658 | "language": "python",
659 | "name": "python3"
660 | },
661 | "language_info": {
662 | "codemirror_mode": {
663 | "name": "ipython",
664 | "version": 3
665 | },
666 | "file_extension": ".py",
667 | "mimetype": "text/x-python",
668 | "name": "python",
669 | "nbconvert_exporter": "python",
670 | "pygments_lexer": "ipython3",
671 | "version": "3.8.2"
672 | }
673 | },
674 | "nbformat": 4,
675 | "nbformat_minor": 1
676 | }
677 |
--------------------------------------------------------------------------------
/notebooks/02_TFX_Training_Pipeline.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "mTVp-9PGYFIO"
17 | },
18 | "source": [
19 | "This notebook assumes you are familiar with the basics of Vertex AI, TFX (especially custom components), and TensorFlow. "
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "id": "W7gJqmqrsfqh"
26 | },
27 | "source": [
28 | "## References\n",
29 | "\n",
30 | "This notebook refers to the following resources and also reuses parts of the code from there: \n",
31 | "* [Simple TFX Pipeline for Vertex Pipelines](https://colab.research.google.com/github/tensorflow/tfx/blob/master/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb)\n",
32 | "* [Vertex AI Training with TFX and Vertex Pipelines](https://www.tensorflow.org/tfx/tutorials/tfx/gcp/vertex_pipelines_vertex_training)\n",
33 | "* [Importing models to Vertex AI](https://cloud.google.com/vertex-ai/docs/general/import-model)\n",
34 | "* [Deploying a model using the Vertex AI API](https://cloud.google.com/vertex-ai/docs/predictions/deploy-model-api)\n",
35 | "* [MLOPs with Vertex AI](https://github.com/GoogleCloudPlatform/mlops-with-vertex-ai)\n",
36 | "* [Custom components TFX](https://www.tensorflow.org/tfx/tutorials/tfx/python_function_component)"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "O9aBRdubPFsU"
43 | },
44 | "source": [
45 | "## Prerequisites\n",
46 | "- Enable Vertex AI API\n",
47 | "- Add the following rules to IAM\n",
48 | " - Vertex AI Custom Code Service Agent\n",
49 | " - Vertex AI Service Agent\n",
50 | " - Vertex AI User\n",
51 | " - Artifact Registry Service Agent\n",
52 | " - Container Registry Service Agent"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "id": "D04aKMGWXjOu"
59 | },
60 | "source": [
61 | "## Setup"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 1,
67 | "metadata": {
68 | "id": "I_niUhp_TY1G"
69 | },
70 | "outputs": [],
71 | "source": [
72 | "# Use the latest version of pip.\n",
73 | "%%capture\n",
74 | "!pip install --upgrade tfx==1.2.0 kfp==1.6.1\n",
75 | "!pip install -q --upgrade google-cloud-aiplatform"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {
81 | "id": "ZVmgQ6w1oT_Z"
82 | },
83 | "source": [
84 | "### ***Please restart runtime before continuing.*** "
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {
91 | "id": "mstgsNHWoiXk"
92 | },
93 | "outputs": [],
94 | "source": [
95 | "!gcloud init"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 2,
101 | "metadata": {
102 | "id": "Pl8ewjX3oXRx"
103 | },
104 | "outputs": [],
105 | "source": [
106 | "from google.colab import auth\n",
107 | "auth.authenticate_user()"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {
113 | "id": "zqVWpmywXngD"
114 | },
115 | "source": [
116 | "## Imports"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 3,
122 | "metadata": {
123 | "colab": {
124 | "base_uri": "https://localhost:8080/"
125 | },
126 | "id": "wptXF0e-UXsT",
127 | "outputId": "3228fd0e-aac7-454d-dcfa-ddcf3f74ee12"
128 | },
129 | "outputs": [
130 | {
131 | "name": "stdout",
132 | "output_type": "stream",
133 | "text": [
134 | "TensorFlow version: 2.5.1\n",
135 | "TFX version: 1.2.0\n",
136 | "KFP version: 1.6.1\n"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "import tensorflow as tf\n",
142 | "\n",
143 | "print(\"TensorFlow version: {}\".format(tf.__version__))\n",
144 | "from tfx import v1 as tfx\n",
145 | "\n",
146 | "print(\"TFX version: {}\".format(tfx.__version__))\n",
147 | "import kfp\n",
148 | "\n",
149 | "print(\"KFP version: {}\".format(kfp.__version__))\n",
150 | "\n",
151 | "from google.cloud import aiplatform as vertex_ai\n",
152 | "import os"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {
158 | "id": "hFYHeepnXxpZ"
159 | },
160 | "source": [
161 | "## Environment setup"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 4,
167 | "metadata": {
168 | "id": "zPVyBrXrW-vu"
169 | },
170 | "outputs": [],
171 | "source": [
172 | "GOOGLE_CLOUD_PROJECT = \"gcp-ml-172005\" # @param {type:\"string\"}\n",
173 | "GOOGLE_CLOUD_REGION = \"us-central1\" # @param {type:\"string\"}\n",
174 | "GCS_BUCKET_NAME = \"cifar10-experimental-csp2\" # @param {type:\"string\"}\n",
175 | "DATA_ROOT = \"gs://cifar10-csp-public2\" # @param {type:\"string\"}\n",
176 | "\n",
177 | "if not (GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_REGION and GCS_BUCKET_NAME):\n",
178 | " from absl import logging\n",
179 | "\n",
180 | " logging.error(\"Please set all required parameters.\")"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {
186 | "id": "CV-BZSvQq7YY"
187 | },
188 | "source": [
189 | "The location of the bucket must be a single region. Also, the bucket needs to be created in a region when [Vertex AI services are available](https://cloud.google.com/vertex-ai/docs/general/locations#available_regions). "
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 5,
195 | "metadata": {
196 | "colab": {
197 | "base_uri": "https://localhost:8080/"
198 | },
199 | "id": "J65KHrt4X-Fu",
200 | "outputId": "ab76e6a9-dc20-41df-e42b-bc516ee67e7f"
201 | },
202 | "outputs": [
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "PIPELINE_ROOT: gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes\n"
208 | ]
209 | }
210 | ],
211 | "source": [
212 | "PIPELINE_NAME = \"continuous-adaptation-for-data-changes\"\n",
213 | "\n",
214 | "# Path to various pipeline artifact.\n",
215 | "PIPELINE_ROOT = \"gs://{}/pipeline_root/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n",
216 | "\n",
217 | "# Paths for users' Python module.\n",
218 | "MODULE_ROOT = \"gs://{}/pipeline_module/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n",
219 | "\n",
220 | "# This is the path where your model will be pushed for serving.\n",
221 | "SERVING_MODEL_DIR = \"gs://{}/serving_model/{}\".format(GCS_BUCKET_NAME, PIPELINE_NAME)\n",
222 | "\n",
223 | "print(\"PIPELINE_ROOT: {}\".format(PIPELINE_ROOT))"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {
229 | "id": "kQVpzyftX0y0"
230 | },
231 | "source": [
232 | "## Create training modules"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 6,
238 | "metadata": {
239 | "id": "zFgnx3uGAfuj"
240 | },
241 | "outputs": [],
242 | "source": [
243 | "_trainer_module_file = 'trainer.py'"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 7,
249 | "metadata": {
250 | "colab": {
251 | "base_uri": "https://localhost:8080/"
252 | },
253 | "id": "sZqzotkfAf-C",
254 | "outputId": "ec319d61-dbbb-4095-f2d2-0d2704d872ba"
255 | },
256 | "outputs": [
257 | {
258 | "name": "stdout",
259 | "output_type": "stream",
260 | "text": [
261 | "Writing trainer.py\n"
262 | ]
263 | }
264 | ],
265 | "source": [
266 | "%%writefile {_trainer_module_file}\n",
267 | "\n",
268 | "from typing import List\n",
269 | "from absl import logging\n",
270 | "from tensorflow import keras\n",
271 | "from tfx import v1 as tfx\n",
272 | "import tensorflow as tf\n",
273 | "\n",
274 | "_IMAGE_FEATURES = {\n",
275 | " \"image\": tf.io.FixedLenFeature([], tf.string),\n",
276 | " \"label\": tf.io.FixedLenFeature([], tf.int64),\n",
277 | "}\n",
278 | "\n",
279 | "_CONCRETE_INPUT = \"numpy_inputs\"\n",
280 | "_TRAIN_BATCH_SIZE = 64\n",
281 | "_EVAL_BATCH_SIZE = 64\n",
282 | "_INPUT_SHAPE = (32, 32, 3)\n",
283 | "_EPOCHS = 2\n",
284 | "\n",
285 | "\n",
286 | "def _parse_fn(example):\n",
287 | " example = tf.io.parse_single_example(example, _IMAGE_FEATURES)\n",
288 | " image = tf.image.decode_jpeg(example[\"image\"], channels=3)\n",
289 | " class_label = tf.cast(example[\"label\"], tf.int32)\n",
290 | " return image, class_label\n",
291 | "\n",
292 | "\n",
293 | "def _input_fn(file_pattern: List[str], batch_size: int) -> tf.data.Dataset:\n",
294 | " print(f\"Reading data from: {file_pattern}\")\n",
295 | " tfrecord_filenames = tf.io.gfile.glob(file_pattern[0] + \".gz\")\n",
296 | " print(tfrecord_filenames)\n",
297 | " dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type=\"GZIP\")\n",
298 | " dataset = dataset.map(_parse_fn).batch(batch_size)\n",
299 | " return dataset.repeat()\n",
300 | "\n",
301 | "\n",
302 | "def _make_keras_model() -> tf.keras.Model:\n",
303 | " \"\"\"Creates a ResNet50-based model for classifying flowers data.\n",
304 | "\n",
305 | " Returns:\n",
306 | " A Keras Model.\n",
307 | " \"\"\"\n",
308 | " inputs = keras.Input(shape=_INPUT_SHAPE)\n",
309 | " base_model = keras.applications.ResNet50(\n",
310 | " include_top=False, input_shape=_INPUT_SHAPE, pooling=\"avg\"\n",
311 | " )\n",
312 | " base_model.trainable = False\n",
313 | " x = tf.keras.applications.resnet.preprocess_input(inputs)\n",
314 | " x = base_model(\n",
315 | " x, training=False\n",
316 | " ) # Ensures BatchNorm runs in inference model in this model\n",
317 | " outputs = keras.layers.Dense(10, activation=\"softmax\")(x)\n",
318 | " model = keras.Model(inputs, outputs)\n",
319 | "\n",
320 | " model.compile(\n",
321 | " optimizer=keras.optimizers.Adam(),\n",
322 | " loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
323 | " metrics=[keras.metrics.SparseCategoricalAccuracy()],\n",
324 | " )\n",
325 | "\n",
326 | " model.summary(print_fn=logging.info)\n",
327 | " return model\n",
328 | "\n",
329 | "\n",
330 | "def _preprocess(bytes_input):\n",
331 | " decoded = tf.io.decode_jpeg(bytes_input, channels=3)\n",
332 | " resized = tf.image.resize(decoded, size=(32, 32))\n",
333 | " return resized\n",
334 | "\n",
335 | "\n",
336 | "@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n",
337 | "def preprocess_fn(bytes_inputs):\n",
338 | " decoded_images = tf.map_fn(\n",
339 | " _preprocess, bytes_inputs, dtype=tf.float32, back_prop=False\n",
340 | " )\n",
341 | " return {_CONCRETE_INPUT: decoded_images}\n",
342 | "\n",
343 | "\n",
344 | "def _model_exporter(model: tf.keras.Model):\n",
345 | " m_call = tf.function(model.call).get_concrete_function(\n",
346 | " [tf.TensorSpec(shape=[None, 32, 32, 3], dtype=tf.float32, name=_CONCRETE_INPUT)]\n",
347 | " )\n",
348 | "\n",
349 | " @tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n",
350 | " def serving_fn(bytes_inputs):\n",
351 | " # This function comes from the Computer Vision book from O'Reilly.\n",
352 | " labels = tf.constant(\n",
353 | " [\n",
354 | " \"airplane\",\n",
355 | " \"automobile\",\n",
356 | " \"bird\",\n",
357 | " \"cat\",\n",
358 | " \"deer\",\n",
359 | " \"dog\",\n",
360 | " \"frog\",\n",
361 | " \"horse\",\n",
362 | " \"ship\",\n",
363 | " \"truck\",\n",
364 | " ],\n",
365 | " dtype=tf.string,\n",
366 | " )\n",
367 | " images = preprocess_fn(bytes_inputs)\n",
368 | "\n",
369 | " probs = m_call(**images)\n",
370 | " indices = tf.argmax(probs, axis=1)\n",
371 | " pred_source = tf.gather(params=labels, indices=indices)\n",
372 | " pred_confidence = tf.reduce_max(probs, axis=1)\n",
373 | " return {\"label\": pred_source, \"confidence\": pred_confidence}\n",
374 | "\n",
375 | " return serving_fn\n",
376 | "\n",
377 | "\n",
378 | "def run_fn(fn_args: tfx.components.FnArgs):\n",
379 | " print(fn_args)\n",
380 | "\n",
381 | " train_dataset = _input_fn(fn_args.train_files, batch_size=_TRAIN_BATCH_SIZE)\n",
382 | " eval_dataset = _input_fn(fn_args.eval_files, batch_size=_EVAL_BATCH_SIZE)\n",
383 | "\n",
384 | " model = _make_keras_model()\n",
385 | " model.fit(\n",
386 | " train_dataset,\n",
387 | " steps_per_epoch=fn_args.train_steps,\n",
388 | " validation_data=eval_dataset,\n",
389 | " validation_steps=fn_args.eval_steps,\n",
390 | " epochs=_EPOCHS,\n",
391 | " )\n",
392 | "\n",
393 | " _, acc = model.evaluate(eval_dataset, steps=fn_args.eval_steps)\n",
394 | " logging.info(f\"Validation accuracy: {round(acc * 100, 2)}%\")\n",
395 | " # The result of the training should be saved in `fn_args.serving_model_dir`\n",
396 | " # directory.\n",
397 | " tf.saved_model.save(\n",
398 | " model,\n",
399 | " fn_args.serving_model_dir,\n",
400 | " signatures={\"serving_default\": _model_exporter(model)},\n",
401 | " )"
402 | ]
403 | },
404 | {
405 | "cell_type": "code",
406 | "execution_count": 8,
407 | "metadata": {
408 | "colab": {
409 | "base_uri": "https://localhost:8080/"
410 | },
411 | "id": "DEPD_70MLf9b",
412 | "outputId": "36120020-9a1b-4e57-d39c-7d3b93697a2c"
413 | },
414 | "outputs": [
415 | {
416 | "name": "stdout",
417 | "output_type": "stream",
418 | "text": [
419 | "Copying file://trainer.py [Content-Type=text/x-python]...\n",
420 | "/ [1 files][ 3.8 KiB/ 3.8 KiB] \n",
421 | "Operation completed over 1 objects/3.8 KiB. \n",
422 | " 3.8 KiB 2021-10-16T14:22:00Z gs://cifar10-experimental-csp2/pipeline_module/continuous-adaptation-for-data-changes/trainer.py\n",
423 | "TOTAL: 1 objects, 3890 bytes (3.8 KiB)\n"
424 | ]
425 | }
426 | ],
427 | "source": [
428 | "!gsutil cp {_trainer_module_file} {MODULE_ROOT}/\n",
429 | "!gsutil ls -lh {MODULE_ROOT}/"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 9,
435 | "metadata": {
436 | "colab": {
437 | "base_uri": "https://localhost:8080/",
438 | "height": 35
439 | },
440 | "id": "uKK1LHdaNIJc",
441 | "outputId": "74130cba-63c5-474f-c915-6ab3c355a3f3"
442 | },
443 | "outputs": [
444 | {
445 | "data": {
446 | "application/vnd.google.colaboratory.intrinsic+json": {
447 | "type": "string"
448 | },
449 | "text/plain": [
450 | "'gs://cifar10-experimental-csp2/pipeline_module/continuous-adaptation-for-data-changes/trainer.py'"
451 | ]
452 | },
453 | "execution_count": 9,
454 | "metadata": {},
455 | "output_type": "execute_result"
456 | }
457 | ],
458 | "source": [
459 | "os.path.join(MODULE_ROOT, _trainer_module_file)"
460 | ]
461 | },
462 | {
463 | "cell_type": "markdown",
464 | "metadata": {
465 | "id": "-WTt_mw3cnia"
466 | },
467 | "source": [
468 | "## Custom Vertex Components \n",
469 | "- basically cloned from [Dual Deployment Project]()"
470 | ]
471 | },
472 | {
473 | "cell_type": "code",
474 | "execution_count": 10,
475 | "metadata": {
476 | "id": "T7XUOglgctyb"
477 | },
478 | "outputs": [],
479 | "source": [
480 | "_vertex_uploader_module_file = \"vertex_uploader.py\"\n",
481 | "_vertex_deployer_module_file = \"vertex_deployer.py\""
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": 11,
487 | "metadata": {
488 | "colab": {
489 | "base_uri": "https://localhost:8080/"
490 | },
491 | "id": "qnOtYLm6cviP",
492 | "outputId": "5334b065-a124-47ab-91d3-bbff4f470396"
493 | },
494 | "outputs": [
495 | {
496 | "name": "stdout",
497 | "output_type": "stream",
498 | "text": [
499 | "Writing vertex_uploader.py\n"
500 | ]
501 | }
502 | ],
503 | "source": [
504 | "%%writefile {_vertex_uploader_module_file}\n",
505 | "\n",
506 | "import os\n",
507 | "import tensorflow as tf\n",
508 | "\n",
509 | "from tfx.dsl.component.experimental.decorators import component\n",
510 | "from tfx.dsl.component.experimental.annotations import Parameter\n",
511 | "from tfx.types.standard_artifacts import String\n",
512 | "from google.cloud import aiplatform as vertex_ai\n",
513 | "from tfx import v1 as tfx\n",
514 | "from absl import logging\n",
515 | "\n",
516 | "\n",
517 | "@component\n",
518 | "def VertexUploader(\n",
519 | " project: Parameter[str],\n",
520 | " region: Parameter[str],\n",
521 | " model_display_name: Parameter[str],\n",
522 | " pushed_model_location: Parameter[str],\n",
523 | " serving_image_uri: Parameter[str],\n",
524 | " uploaded_model: tfx.dsl.components.OutputArtifact[String],\n",
525 | "):\n",
526 | "\n",
527 | " vertex_ai.init(project=project, location=region)\n",
528 | "\n",
529 | " pushed_model_dir = os.path.join(\n",
530 | " pushed_model_location, tf.io.gfile.listdir(pushed_model_location)[-1]\n",
531 | " )\n",
532 | "\n",
533 | " logging.info(f\"Model registry location: {pushed_model_dir}\")\n",
534 | "\n",
535 | " vertex_model = vertex_ai.Model.upload(\n",
536 | " display_name=model_display_name,\n",
537 | " artifact_uri=pushed_model_dir,\n",
538 | " serving_container_image_uri=serving_image_uri,\n",
539 | " parameters_schema_uri=None,\n",
540 | " instance_schema_uri=None,\n",
541 | " explanation_metadata=None,\n",
542 | " explanation_parameters=None,\n",
543 | " )\n",
544 | "\n",
545 | " uploaded_model.set_string_custom_property(\n",
546 | " \"model_resource_name\", str(vertex_model.resource_name)\n",
547 | " )\n",
548 | " logging.info(f\"Model resource: {str(vertex_model.resource_name)}\")"
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "execution_count": 12,
554 | "metadata": {
555 | "colab": {
556 | "base_uri": "https://localhost:8080/"
557 | },
558 | "id": "5nUaRKSJczio",
559 | "outputId": "b62b81ae-cc2a-4551-d5a4-751b41bc89bc"
560 | },
561 | "outputs": [
562 | {
563 | "name": "stdout",
564 | "output_type": "stream",
565 | "text": [
566 | "Writing vertex_deployer.py\n"
567 | ]
568 | }
569 | ],
570 | "source": [
571 | "%%writefile {_vertex_deployer_module_file}\n",
572 | "\n",
573 | "from tfx.dsl.component.experimental.decorators import component\n",
574 | "from tfx.dsl.component.experimental.annotations import Parameter\n",
575 | "from tfx.types.standard_artifacts import String\n",
576 | "from google.cloud import aiplatform as vertex_ai\n",
577 | "from tfx import v1 as tfx\n",
578 | "from absl import logging\n",
579 | "\n",
580 | "\n",
581 | "@component\n",
582 | "def VertexDeployer(\n",
583 | " project: Parameter[str],\n",
584 | " region: Parameter[str],\n",
585 | " model_display_name: Parameter[str],\n",
586 | " deployed_model_display_name: Parameter[str],\n",
587 | "):\n",
588 | "\n",
589 | " logging.info(f\"Endpoint display: {deployed_model_display_name}\")\n",
590 | " vertex_ai.init(project=project, location=region)\n",
591 | "\n",
592 | " endpoints = vertex_ai.Endpoint.list(\n",
593 | " filter=f\"display_name={deployed_model_display_name}\", order_by=\"update_time\"\n",
594 | " )\n",
595 | "\n",
596 | " if len(endpoints) > 0:\n",
597 | " logging.info(f\"Endpoint {deployed_model_display_name} already exists.\")\n",
598 | " endpoint = endpoints[-1]\n",
599 | " else:\n",
600 | " endpoint = vertex_ai.Endpoint.create(deployed_model_display_name)\n",
601 | "\n",
602 | " model = vertex_ai.Model.list(\n",
603 | " filter=f\"display_name={model_display_name}\", order_by=\"update_time\"\n",
604 | " )[-1]\n",
605 | "\n",
606 | " endpoint = vertex_ai.Endpoint.list(\n",
607 | " filter=f\"display_name={deployed_model_display_name}\", order_by=\"update_time\"\n",
608 | " )[-1]\n",
609 | "\n",
610 | " deployed_model = endpoint.deploy(\n",
611 | " model=model,\n",
612 | " # Syntax from here: https://git.io/JBQDP\n",
613 | " traffic_split={\"0\": 100},\n",
614 | " machine_type=\"n1-standard-4\",\n",
615 | " min_replica_count=1,\n",
616 | " max_replica_count=1,\n",
617 | " )\n",
618 | "\n",
619 | " logging.info(f\"Model deployed to: {deployed_model}\")"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 13,
625 | "metadata": {
626 | "id": "QyR80VnedA9Y"
627 | },
628 | "outputs": [],
629 | "source": [
630 | "!mkdir -p ./custom_components\n",
631 | "!touch ./custom_components/__init__.py\n",
632 | "!cp -r {_vertex_uploader_module_file} {_vertex_deployer_module_file} custom_components"
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "execution_count": 14,
638 | "metadata": {
639 | "colab": {
640 | "base_uri": "https://localhost:8080/"
641 | },
642 | "id": "DLXV-aRodEmH",
643 | "outputId": "e23b1a28-a329-470e-9d1b-89ac2fab820b"
644 | },
645 | "outputs": [
646 | {
647 | "name": "stdout",
648 | "output_type": "stream",
649 | "text": [
650 | "total 8.0K\n",
651 | "-rw-r--r-- 1 root root 0 Oct 16 14:22 __init__.py\n",
652 | "-rw-r--r-- 1 root root 1.5K Oct 16 14:22 vertex_deployer.py\n",
653 | "-rw-r--r-- 1 root root 1.4K Oct 16 14:22 vertex_uploader.py\n"
654 | ]
655 | }
656 | ],
657 | "source": [
658 | "!ls -lh custom_components"
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 15,
664 | "metadata": {
665 | "colab": {
666 | "base_uri": "https://localhost:8080/"
667 | },
668 | "id": "5-un8Vj1dGoL",
669 | "outputId": "56471ff8-29a0-49b7-fdd3-137a071f6613"
670 | },
671 | "outputs": [
672 | {
673 | "name": "stdout",
674 | "output_type": "stream",
675 | "text": [
676 | "URI of the custom image: gcr.io/gcp-ml-172005/cifar10:tfx-1-2-0\n"
677 | ]
678 | }
679 | ],
680 | "source": [
681 | "DATASET_DISPLAY_NAME = \"cifar10\"\n",
682 | "VERSION = \"tfx-1-2-0\"\n",
683 | "TFX_IMAGE_URI = f\"gcr.io/{GOOGLE_CLOUD_PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}\"\n",
684 | "print(f\"URI of the custom image: {TFX_IMAGE_URI}\")"
685 | ]
686 | },
687 | {
688 | "cell_type": "code",
689 | "execution_count": null,
690 | "metadata": {
691 | "colab": {
692 | "base_uri": "https://localhost:8080/"
693 | },
694 | "id": "95lKF_6QdQ4o",
695 | "outputId": "5e4b8876-00f3-4045-c680-c9ed5f35dcd7"
696 | },
697 | "outputs": [
698 | {
699 | "name": "stdout",
700 | "output_type": "stream",
701 | "text": [
702 | "Writing Dockerfile\n"
703 | ]
704 | }
705 | ],
706 | "source": [
707 | "%%writefile Dockerfile\n",
708 | "\n",
709 | "FROM gcr.io/tfx-oss-public/tfx:1.2.0\n",
710 | "RUN mkdir -p custom_components\n",
711 | "COPY custom_components/* ./custom_components/\n",
712 | "RUN pip install --upgrade google-cloud-aiplatform"
713 | ]
714 | },
715 | {
716 | "cell_type": "code",
717 | "execution_count": null,
718 | "metadata": {
719 | "id": "Tc_K3jVCdXE8"
720 | },
721 | "outputs": [],
722 | "source": [
723 | "!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=15m --machine-type=e2-highcpu-8"
724 | ]
725 | },
726 | {
727 | "cell_type": "markdown",
728 | "metadata": {
729 | "id": "zGJU5sXrrAJW"
730 | },
731 | "source": [
732 | "# Pipeline"
733 | ]
734 | },
735 | {
736 | "cell_type": "code",
737 | "execution_count": 16,
738 | "metadata": {
739 | "id": "sEbNM9CeERX2"
740 | },
741 | "outputs": [],
742 | "source": [
743 | "# Specify training worker configurations. To minimize costs we can even specify two\n",
744 | "# different configurations: a beefier machine for the Endpoint model and slightly less\n",
745 | "# powerful machine for the mobile model.\n",
746 | "TRAINING_JOB_SPEC = {\n",
747 | " \"project\": GOOGLE_CLOUD_PROJECT,\n",
748 | " \"worker_pool_specs\": [\n",
749 | " {\n",
750 | " \"machine_spec\": {\n",
751 | " \"machine_type\": \"n1-standard-4\",\n",
752 | " \"accelerator_type\": \"NVIDIA_TESLA_K80\",\n",
753 | " \"accelerator_count\": 1,\n",
754 | " },\n",
755 | " \"replica_count\": 1,\n",
756 | " \"container_spec\": {\n",
757 | " \"image_uri\": \"gcr.io/tfx-oss-public/tfx:{}\".format(tfx.__version__),\n",
758 | " },\n",
759 | " }\n",
760 | " ],\n",
761 | "}"
762 | ]
763 | },
764 | {
765 | "cell_type": "code",
766 | "execution_count": 17,
767 | "metadata": {
768 | "id": "Ivc6LzpVuzKb"
769 | },
770 | "outputs": [],
771 | "source": [
772 | "SERVING_JOB_SPEC = {\n",
773 | " \"endpoint_name\": PIPELINE_NAME.replace(\"-\", \"_\"), # '-' is not allowed.\n",
774 | " \"project_id\": GOOGLE_CLOUD_PROJECT,\n",
775 | " \"min_replica_count\": 1,\n",
776 | " \"max_replica_count\": 1,\n",
777 | " \"machine_type\": \"n1-standard-2\",\n",
778 | "}"
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": 18,
784 | "metadata": {
785 | "id": "lOteqi0td5Vu"
786 | },
787 | "outputs": [],
788 | "source": [
789 | "from datetime import datetime\n",
790 | "\n",
791 | "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")"
792 | ]
793 | },
794 | {
795 | "cell_type": "code",
796 | "execution_count": 19,
797 | "metadata": {
798 | "id": "EXtVu_w6Achq"
799 | },
800 | "outputs": [],
801 | "source": [
802 | "import tfx"
803 | ]
804 | },
805 | {
806 | "cell_type": "markdown",
807 | "metadata": {
808 | "id": "qITdwKUKRZUg"
809 | },
810 | "source": [
811 | "Spanning feature is currently not working in TFX version <= 1.3.0. It will be fixed in the next release. But for now, as a workaround, this notebook uses `utils.calculate_splits_fingerprint_span_and_version` function manually. Also note that `RuntimeParameter` can't be set within `utils.calculate_splits_fingerprint_span_and_version`, so it is not possible to select the range of spans dynamically during the runtime. \n",
812 | "\n",
813 | "When the new relase of TFX comes out, this part will be fixed."
814 | ]
815 | },
816 | {
817 | "cell_type": "code",
818 | "execution_count": 42,
819 | "metadata": {
820 | "id": "ln1cvbcfphA9"
821 | },
822 | "outputs": [],
823 | "source": [
824 | "from tfx.orchestration import data_types\n",
825 | "\n",
826 | "from tfx import v1 as tfx\n",
827 | "from tfx.proto import example_gen_pb2, range_config_pb2\n",
828 | "from tfx.components.example_gen import utils\n",
829 | "\n",
830 | "from custom_components.vertex_uploader import VertexUploader\n",
831 | "from custom_components.vertex_deployer import VertexDeployer\n",
832 | "\n",
833 | "\n",
834 | "def _create_pipeline(\n",
835 | " input_config: data_types.RuntimeParameter,\n",
836 | " output_config: data_types.RuntimeParameter,\n",
837 | " pipeline_name: str,\n",
838 | " pipeline_root: str,\n",
839 | " data_root: str,\n",
840 | " serving_model_dir: str,\n",
841 | " trainer_module: str,\n",
842 | " project_id: str,\n",
843 | " region: str,\n",
844 | ") -> tfx.dsl.Pipeline:\n",
845 | " \"\"\"Creates a three component flowers pipeline with TFX.\"\"\"\n",
846 | " example_gen = tfx.components.ImportExampleGen(\n",
847 | " input_base=data_root, input_config=input_config, output_config=output_config\n",
848 | " )\n",
849 | "\n",
850 | " # Trainer\n",
851 | " trainer = tfx.extensions.google_cloud_ai_platform.Trainer(\n",
852 | " module_file=trainer_module,\n",
853 | " examples=example_gen.outputs[\"examples\"],\n",
854 | " train_args=tfx.proto.TrainArgs(splits=[\"train\"], num_steps=50000 // 64),\n",
855 | " eval_args=tfx.proto.EvalArgs(splits=[\"val\"], num_steps=10000 // 64),\n",
856 | " custom_config={\n",
857 | " tfx.extensions.google_cloud_ai_platform.ENABLE_VERTEX_KEY: True,\n",
858 | " tfx.extensions.google_cloud_ai_platform.VERTEX_REGION_KEY: region,\n",
859 | " tfx.extensions.google_cloud_ai_platform.TRAINING_ARGS_KEY: TRAINING_JOB_SPEC,\n",
860 | " \"use_gpu\": True,\n",
861 | " },\n",
862 | " ).with_id(\"trainer\")\n",
863 | "\n",
864 | " # Pushes the model to a filesystem destination.\n",
865 | " pushed_model_location = os.path.join(serving_model_dir, \"resnet50\")\n",
866 | " resnet_pusher = tfx.components.Pusher(\n",
867 | " model=trainer.outputs[\"model\"],\n",
868 | " push_destination=tfx.proto.PushDestination(\n",
869 | " filesystem=tfx.proto.PushDestination.Filesystem(\n",
870 | " base_directory=pushed_model_location\n",
871 | " )\n",
872 | " ),\n",
873 | " ).with_id(\"resnet_pusher\")\n",
874 | "\n",
875 | " # Vertex AI upload.\n",
876 | " model_display_name = \"resnet_cifar_latest\"\n",
877 | " uploader = VertexUploader(\n",
878 | " project=project_id,\n",
879 | " region=region,\n",
880 | " model_display_name=model_display_name,\n",
881 | " pushed_model_location=pushed_model_location,\n",
882 | " serving_image_uri=\"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-5:latest\",\n",
883 | " ).with_id(\"vertex_uploader\")\n",
884 | " uploader.add_upstream_node(resnet_pusher)\n",
885 | "\n",
886 | " # Create an endpoint.\n",
887 | " deployer = VertexDeployer(\n",
888 | " project=project_id,\n",
889 | " region=region,\n",
890 | " model_display_name=model_display_name,\n",
891 | " deployed_model_display_name=model_display_name + \"_\" + TIMESTAMP,\n",
892 | " ).with_id(\"vertex_deployer\")\n",
893 | " deployer.add_upstream_node(uploader)\n",
894 | "\n",
895 | " components = [\n",
896 | " example_gen,\n",
897 | " trainer,\n",
898 | " resnet_pusher,\n",
899 | " uploader,\n",
900 | " deployer,\n",
901 | " ]\n",
902 | "\n",
903 | " return tfx.dsl.Pipeline(\n",
904 | " pipeline_name=pipeline_name,\n",
905 | " pipeline_root=pipeline_root,\n",
906 | " components=components,\n",
907 | " enable_cache=True,\n",
908 | " )"
909 | ]
910 | },
911 | {
912 | "cell_type": "markdown",
913 | "metadata": {
914 | "id": "IFdlslfOX54z"
915 | },
916 | "source": [
917 | "## Compile the pipeline"
918 | ]
919 | },
920 | {
921 | "cell_type": "code",
922 | "execution_count": 43,
923 | "metadata": {
924 | "id": "-AY5Z2tbsbwE"
925 | },
926 | "outputs": [],
927 | "source": [
928 | "import os\n",
929 | "\n",
930 | "PIPELINE_DEFINITION_FILE = PIPELINE_NAME + \"_pipeline.json\"\n",
931 | "\n",
932 | "# Important: We need to pass the custom Docker image URI to the\n",
933 | "# `KubeflowV2DagRunnerConfig` to take effect.\n",
934 | "runner = tfx.orchestration.experimental.KubeflowV2DagRunner(\n",
935 | " config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(\n",
936 | " default_image=TFX_IMAGE_URI\n",
937 | " ),\n",
938 | " output_filename=PIPELINE_DEFINITION_FILE,\n",
939 | ")\n",
940 | "\n",
941 | "_ = runner.run(\n",
942 | " _create_pipeline(\n",
943 | " input_config=tfx.dsl.experimental.RuntimeParameter(\n",
944 | " name=\"input-config\",\n",
945 | " default='{\"input_config\": {\"splits\": [{\"name\":\"train\", \"pattern\":\"span-1/train/tfrecord\"}, {\"name\":\"val\", \"pattern\":\"span-1/test/tfrecord\"}]}}',\n",
946 | " ptype=str,\n",
947 | " ),\n",
948 | " output_config=tfx.dsl.experimental.RuntimeParameter(\n",
949 | " name=\"output-config\", default=\"{}\", ptype=str,\n",
950 | " ),\n",
951 | " pipeline_name=PIPELINE_NAME,\n",
952 | " pipeline_root=PIPELINE_ROOT,\n",
953 | " data_root=DATA_ROOT,\n",
954 | " serving_model_dir=SERVING_MODEL_DIR,\n",
955 | " trainer_module=os.path.join(MODULE_ROOT, _trainer_module_file),\n",
956 | " project_id=GOOGLE_CLOUD_PROJECT,\n",
957 | " region=GOOGLE_CLOUD_REGION,\n",
958 | " )\n",
959 | ")"
960 | ]
961 | },
962 | {
963 | "cell_type": "code",
964 | "execution_count": 48,
965 | "metadata": {
966 | "colab": {
967 | "base_uri": "https://localhost:8080/"
968 | },
969 | "id": "oCSQ98YN-F6v",
970 | "outputId": "d82ffbb8-7320-420d-9983-2f3b7476f075"
971 | },
972 | "outputs": [
973 | {
974 | "name": "stdout",
975 | "output_type": "stream",
976 | "text": [
977 | "Copying file://continuous-adaptation-for-data-changes_pipeline.json [Content-Type=application/json]...\n",
978 | "/ [1 files][ 8.7 KiB/ 8.7 KiB] \n",
979 | "Operation completed over 1 objects/8.7 KiB. \n",
980 | " 8.69 KiB 2021-10-16T15:51:22Z gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes/continuous-adaptation-for-data-changes_pipeline.json\n",
981 | " gs://cifar10-experimental-csp2/pipeline_root/continuous-adaptation-for-data-changes/874401645461/\n",
982 | "TOTAL: 1 objects, 8896 bytes (8.69 KiB)\n"
983 | ]
984 | }
985 | ],
986 | "source": [
987 | "!gsutil cp {PIPELINE_DEFINITION_FILE} {PIPELINE_ROOT}/\n",
988 | "!gsutil ls -lh {PIPELINE_ROOT}/"
989 | ]
990 | },
991 | {
992 | "cell_type": "markdown",
993 | "metadata": {
994 | "id": "ocHBJaR_X7x2"
995 | },
996 | "source": [
997 | "## Submit the pipeline for execution to Vertex AI\n",
998 | "\n",
999 | "Generally, it's a good idea to first do a local run of the end-to-end pipeline before submitting it an online orchestrator. We can use `tfx.orchestration.LocalDagRunner()` for that but for the purposes of this notebook we won't be doing that. "
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": 27,
1005 | "metadata": {
1006 | "colab": {
1007 | "base_uri": "https://localhost:8080/"
1008 | },
1009 | "id": "3elrtDOus83z",
1010 | "outputId": "cb8af6dc-ed37-447a-e8a6-5aefed30211e"
1011 | },
1012 | "outputs": [
1013 | {
1014 | "name": "stderr",
1015 | "output_type": "stream",
1016 | "text": [
1017 | "WARNING:google.auth._default:No project ID could be determined. Consider running `gcloud config set project` or setting the GOOGLE_CLOUD_PROJECT environment variable\n"
1018 | ]
1019 | }
1020 | ],
1021 | "source": [
1022 | "from kfp.v2.google import client\n",
1023 | "\n",
1024 | "pipelines_client = client.AIPlatformClient(\n",
1025 | " project_id=GOOGLE_CLOUD_PROJECT, region=GOOGLE_CLOUD_REGION,\n",
1026 | ")"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": 47,
1032 | "metadata": {
1033 | "colab": {
1034 | "base_uri": "https://localhost:8080/",
1035 | "height": 34
1036 | },
1037 | "id": "TiSaBREqfa86",
1038 | "outputId": "9b0addc0-9677-4202-9ffb-128485972bc1"
1039 | },
1040 | "outputs": [
1041 | {
1042 | "data": {
1043 | "text/html": [
1044 | "See the Pipeline job here."
1045 | ],
1046 | "text/plain": [
1047 | ""
1048 | ]
1049 | },
1050 | "metadata": {},
1051 | "output_type": "display_data"
1052 | }
1053 | ],
1054 | "source": [
1055 | "import json\n",
1056 | "from tfx.orchestration import data_types\n",
1057 | "\n",
1058 | "_ = pipelines_client.create_run_from_job_spec(\n",
1059 | " PIPELINE_DEFINITION_FILE,\n",
1060 | " enable_caching=False,\n",
1061 | " parameter_values={\n",
1062 | " \"input-config\": json.dumps(\n",
1063 | " {\n",
1064 | " \"splits\": [\n",
1065 | " {\"name\": \"train\", \"pattern\": \"span-[12]/train/*.tfrecord\"},\n",
1066 | " {\"name\": \"val\", \"pattern\": \"span-[12]/test/*.tfrecord\"},\n",
1067 | " ]\n",
1068 | " }\n",
1069 | " ),\n",
1070 | " \"output-config\": json.dumps({}),\n",
1071 | " },\n",
1072 | ")"
1073 | ]
1074 | }
1075 | ],
1076 | "metadata": {
1077 | "colab": {
1078 | "collapsed_sections": [],
1079 | "include_colab_link": true,
1080 | "machine_shape": "hm",
1081 | "name": "Custom_Model_TFX",
1082 | "provenance": []
1083 | },
1084 | "environment": {
1085 | "name": "tf2-gpu.2-4.mnightly-2021-02-02-debian-10-test",
1086 | "type": "gcloud",
1087 | "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-4:mnightly-2021-02-02-debian-10-test"
1088 | },
1089 | "kernelspec": {
1090 | "display_name": "Python 3 (ipykernel)",
1091 | "language": "python",
1092 | "name": "python3"
1093 | },
1094 | "language_info": {
1095 | "codemirror_mode": {
1096 | "name": "ipython",
1097 | "version": 3
1098 | },
1099 | "file_extension": ".py",
1100 | "mimetype": "text/x-python",
1101 | "name": "python",
1102 | "nbconvert_exporter": "python",
1103 | "pygments_lexer": "ipython3",
1104 | "version": "3.8.2"
1105 | }
1106 | },
1107 | "nbformat": 4,
1108 | "nbformat_minor": 1
1109 | }
1110 |
--------------------------------------------------------------------------------
/notebooks/01_Dataset_Prep.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "zyOcSJS29gkH"
17 | },
18 | "source": [
19 | "In this notebook, we will download [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset from [TensorFlow Dataset(TFDS)](https://www.tensorflow.org/datasets). The dataset is alreadly prepared as TFRecord format.\n",
20 | "\n",
21 | "We will push the downloaded dataset to a GCS bucket while keeping the directory strucutres like below.\n",
22 | "- gs://bucket-name/span-1/train/train.tfrecord\n",
23 | "- gs://bucket-name/span-1/test/test.tfrecord\n",
24 | "\n",
25 | "To proceed with the rest of the notebook you'd need a billing-enabled GCP account. "
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "pJ2m7-bbxh4h"
32 | },
33 | "source": [
34 | "## Prerequisites\n",
35 | "- Add the following rules to IAM\n",
36 | " - Storage Object Admin\n",
37 | " - Storage Object Creator"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "TRIz8jbQ-MUb"
44 | },
45 | "source": [
46 | "## Setup\n",
47 | "\n",
48 | "In order to access Google Cloud Platform from Colab environment, we need to login to GCP account with `gcloud init` command."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {
55 | "colab": {
56 | "base_uri": "https://localhost:8080/"
57 | },
58 | "id": "lIYdn1woOS1n",
59 | "outputId": "69c253ee-ed9a-420b-9d71-8d89c5da8217"
60 | },
61 | "outputs": [
62 | {
63 | "name": "stdout",
64 | "output_type": "stream",
65 | "text": [
66 | "Welcome! This command will take you through the configuration of gcloud.\n",
67 | "\n",
68 | "Settings from your current configuration [default] are:\n",
69 | "component_manager:\n",
70 | " disable_update_check: 'True'\n",
71 | "compute:\n",
72 | " gce_metadata_read_timeout_sec: '0'\n",
73 | "\n",
74 | "Pick configuration to use:\n",
75 | " [1] Re-initialize this configuration [default] with new settings \n",
76 | " [2] Create a new configuration\n",
77 | "Please enter your numeric choice: 2\n",
78 | "\n",
79 | "Enter configuration name. Names start with a lower case letter and \n",
80 | "contain only lower case letters a-z, digits 0-9, and hyphens '-': gde\n",
81 | "Your current configuration has been set to: [gde]\n",
82 | "\n",
83 | "You can skip diagnostics next time by using the following flag:\n",
84 | " gcloud init --skip-diagnostics\n",
85 | "\n",
86 | "Network diagnostic detects and fixes local network connection issues.\n",
87 | "Reachability Check passed.\n",
88 | "Network diagnostic passed (1/1 checks passed).\n",
89 | "\n",
90 | "You must log in to continue. Would you like to log in (Y/n)? Y\n",
91 | "\n",
92 | "Go to the following link in your browser:\n",
93 | "\n",
94 | " https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=4OCQ4k8wDfa1lmE2eGQdWcKgfqIafI&prompt=consent&access_type=offline&code_challenge=2Mhw5bza8PZZdzFTIavmbxf4VapMEnPbNqgba2lk6kU&code_challenge_method=S256\n",
95 | "\n",
96 | "Enter verification code: 4/1AX4XfWidQVLj29O4VcpmmBw8NMEP7eVI4kNs2L76NERMizZMTHwC2vxPhec\n",
97 | "You are logged in as: [deep.diver.csp@gmail.com].\n",
98 | "\n",
99 | "Pick cloud project to use: \n",
100 | " [1] codelabs-temp\n",
101 | " [2] data-governance-tutorials\n",
102 | " [3] fast-ai-exploration\n",
103 | " [4] gcnresearch\n",
104 | " [5] gcp-ml-172005\n",
105 | " [6] gdeproj\n",
106 | " [7] gdeprojects\n",
107 | " [8] imrenagicom-support\n",
108 | " [9] jax-tpu-bfloat16\n",
109 | " [10] mobile-week-holder\n",
110 | " [11] my-dsc-solution-1\n",
111 | " [12] notional-cirrus-235403\n",
112 | " [13] sample-project-1-314222\n",
113 | " [14] satoluxx-gde\n",
114 | " [15] spreadsheet-api-sample\n",
115 | " [16] Create a new project\n",
116 | "Please enter numeric choice or text value (must exactly match list \n",
117 | "item): 5\n",
118 | "\n",
119 | "Your current project has been set to: [gcp-ml-172005].\n",
120 | "\n",
121 | "Do you want to configure a default Compute Region and Zone? (Y/n)? Y\n",
122 | "\n",
123 | "Which Google Compute Engine zone would you like to use as project \n",
124 | "default?\n",
125 | "If you do not specify a zone via a command line flag while working \n",
126 | "with Compute Engine resources, the default is assumed.\n",
127 | " [1] us-east1-b\n",
128 | " [2] us-east1-c\n",
129 | " [3] us-east1-d\n",
130 | " [4] us-east4-c\n",
131 | " [5] us-east4-b\n",
132 | " [6] us-east4-a\n",
133 | " [7] us-central1-c\n",
134 | " [8] us-central1-a\n",
135 | " [9] us-central1-f\n",
136 | " [10] us-central1-b\n",
137 | " [11] us-west1-b\n",
138 | " [12] us-west1-c\n",
139 | " [13] us-west1-a\n",
140 | " [14] europe-west4-a\n",
141 | " [15] europe-west4-b\n",
142 | " [16] europe-west4-c\n",
143 | " [17] europe-west1-b\n",
144 | " [18] europe-west1-d\n",
145 | " [19] europe-west1-c\n",
146 | " [20] europe-west3-c\n",
147 | " [21] europe-west3-a\n",
148 | " [22] europe-west3-b\n",
149 | " [23] europe-west2-c\n",
150 | " [24] europe-west2-b\n",
151 | " [25] europe-west2-a\n",
152 | " [26] asia-east1-b\n",
153 | " [27] asia-east1-a\n",
154 | " [28] asia-east1-c\n",
155 | " [29] asia-southeast1-b\n",
156 | " [30] asia-southeast1-a\n",
157 | " [31] asia-southeast1-c\n",
158 | " [32] asia-northeast1-b\n",
159 | " [33] asia-northeast1-c\n",
160 | " [34] asia-northeast1-a\n",
161 | " [35] asia-south1-c\n",
162 | " [36] asia-south1-b\n",
163 | " [37] asia-south1-a\n",
164 | " [38] australia-southeast1-b\n",
165 | " [39] australia-southeast1-c\n",
166 | " [40] australia-southeast1-a\n",
167 | " [41] southamerica-east1-b\n",
168 | " [42] southamerica-east1-c\n",
169 | " [43] southamerica-east1-a\n",
170 | " [44] asia-east2-a\n",
171 | " [45] asia-east2-b\n",
172 | " [46] asia-east2-c\n",
173 | " [47] asia-northeast2-a\n",
174 | " [48] asia-northeast2-b\n",
175 | " [49] asia-northeast2-c\n",
176 | " [50] asia-northeast3-a\n",
177 | "Did not print [36] options.\n",
178 | "Too many options [86]. Enter \"list\" at prompt to print choices fully.\n",
179 | "Please enter numeric choice or text value (must exactly match list \n",
180 | "item): 8\n",
181 | "\n",
182 | "Your project default Compute Engine zone has been set to [us-central1-a].\n",
183 | "You can change it by running [gcloud config set compute/zone NAME].\n",
184 | "\n",
185 | "Your project default Compute Engine region has been set to [us-central1].\n",
186 | "You can change it by running [gcloud config set compute/region NAME].\n",
187 | "\n",
188 | "Your Google Cloud SDK is configured and ready to use!\n",
189 | "\n",
190 | "* Commands that require authentication will use deep.diver.csp@gmail.com by default\n",
191 | "* Commands will reference project `gcp-ml-172005` by default\n",
192 | "* Compute Engine commands will use region `us-central1` by default\n",
193 | "* Compute Engine commands will use zone `us-central1-a` by default\n",
194 | "\n",
195 | "Run `gcloud help config` to learn how to change individual settings\n",
196 | "\n",
197 | "This gcloud configuration is called [gde]. You can create additional configurations if you work with multiple accounts and/or projects.\n",
198 | "Run `gcloud topic configurations` to learn more.\n",
199 | "\n",
200 | "Some things to try next:\n",
201 | "\n",
202 | "* Run `gcloud --help` to see the Cloud Platform services you can interact with. And run `gcloud help COMMAND` to get help on any gcloud command.\n",
203 | "* Run `gcloud topic --help` to learn about advanced features of the SDK like arg files and output formatting\n"
204 | ]
205 | }
206 | ],
207 | "source": [
208 | "!gcloud init"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 2,
214 | "metadata": {
215 | "id": "mhDIpjq3yuGV"
216 | },
217 | "outputs": [],
218 | "source": [
219 | "from google.colab import auth\n",
220 | "\n",
221 | "auth.authenticate_user()"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {
227 | "id": "bG-NwjjB-ioI"
228 | },
229 | "source": [
230 | "## Download the original dataset and copy over to a GCS Bucket"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {
236 | "id": "FXvVVA_joPBL"
237 | },
238 | "source": [
239 | "### 1. Create Directories\n",
240 | "\n",
241 | "In this step we are going to create directories to hold to be downloaded TFRecord dataset. As an intial phase, the training and testing dataset will be stored in `span-1/train` and `span-1/test` directoreis respectively.\n",
242 | "\n",
243 | "When there will be more data with the same distribution, we can update the currently stored dataset. In this case, you should turn on the [GCS's versioning feature](https://cloud.google.com/storage/docs/object-versioning).\n",
244 | "\n",
245 | "When there will be more data with the different distribution, we will create other directores of `span-2/train` and `span-2/test` to address data drift. In this way, we can keep data separetly for easier maintanence while handling versioning separtely for different `SPAN`s."
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 3,
251 | "metadata": {
252 | "id": "XaAx0ZJ2QsGp"
253 | },
254 | "outputs": [],
255 | "source": [
256 | "TARGET_ROOT_DIR = \"cifar10\"\n",
257 | "TARGET_TRAIN_DIR = TARGET_ROOT_DIR + \"/span-1/train\"\n",
258 | "TARGET_TEST_DIR = TARGET_ROOT_DIR + \"/span-1/test\"\n",
259 | "\n",
260 | "!mkdir -p {TARGET_TRAIN_DIR}\n",
261 | "!mkdir -p {TARGET_TEST_DIR}"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {
267 | "id": "qt5er4ywpfGv"
268 | },
269 | "source": [
270 | "### 2. Download CIFAR10 Dataset with TFDS"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 4,
276 | "metadata": {
277 | "colab": {
278 | "base_uri": "https://localhost:8080/",
279 | "height": 296,
280 | "referenced_widgets": [
281 | "fb0e11738e734dc6a7428c4a82e81705",
282 | "4f7c45939d3f49eb856ccc58f999e37b",
283 | "a327863b15a54d1eb8cd2c26ee31c826",
284 | "d46fa732fb474fce8f940c031ca06637",
285 | "fcf94990ac4f46b2a0fd54e06eb0f88b",
286 | "1a9b33aebfb444a9b3d9442a5d72cea2",
287 | "710373ad31fe4dc0b29c7c681e345cc8",
288 | "dbe66bad8ba94b30b93c7c9d25cf8fbd",
289 | "9613736bf85c44338d6f5609e6efc90b",
290 | "385a10a4ca644ca38db0f539be5a54d5",
291 | "ecefe53c759e4a54aa1f3ed44444fdf5",
292 | "4d9642439f084654b63b4e7bde24b89c",
293 | "fd5c76c7cda94b22814409cbda62a38d",
294 | "158453b7af414664b766d99ecbb9429e",
295 | "6bf37bc7f5684a8eb3dcf625d02616fd",
296 | "a7265046f01343a0a9de7635e60e2d5f",
297 | "b7e8c1bf6e394e61aae415a735b18ffe",
298 | "c8c9446c1a2f4a3785600ab596a451c9",
299 | "a9e7a7b675844f1ab616bb1972c15d32",
300 | "20d532a52ecd479fb7dfa9342decd2d1",
301 | "0b02c308f2604dd28dd5b430d6239253",
302 | "b515ef8f06744ce486f110326909e90e",
303 | "af4562715ddd41e786e717033a6f3fec",
304 | "5b27a0a8a6964be49befadfc9d209c93",
305 | "d0c79d32309640a18b1097e50c273489",
306 | "2f93c4391a204a26a8179b0a6b46a23e",
307 | "4fdde132c9744c74bbf7aa7a3185ae64",
308 | "6f08665adc814ac09e7cb21e0533866c",
309 | "fd5460933db44c92b60e41b065551215",
310 | "e3c232f427c649f2bf3f58ed97ad15d1",
311 | "29aa47ab203d4099aad31f79c72421f4",
312 | "5a452ead65af499bbf868b5314dd7196",
313 | "9b8d5063647d466684c6e1b196a90796"
314 | ]
315 | },
316 | "id": "dWUoW87xtuQ8",
317 | "outputId": "a6e61183-396a-4912-8f01-94f82185da15"
318 | },
319 | "outputs": [
320 | {
321 | "name": "stdout",
322 | "output_type": "stream",
323 | "text": [
324 | "\u001b[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...\u001b[0m\n"
325 | ]
326 | },
327 | {
328 | "data": {
329 | "application/vnd.jupyter.widget-view+json": {
330 | "model_id": "fb0e11738e734dc6a7428c4a82e81705",
331 | "version_major": 2,
332 | "version_minor": 0
333 | },
334 | "text/plain": [
335 | "Dl Completed...: 0 url [00:00, ? url/s]"
336 | ]
337 | },
338 | "metadata": {},
339 | "output_type": "display_data"
340 | },
341 | {
342 | "data": {
343 | "application/vnd.jupyter.widget-view+json": {
344 | "model_id": "4d9642439f084654b63b4e7bde24b89c",
345 | "version_major": 2,
346 | "version_minor": 0
347 | },
348 | "text/plain": [
349 | "Dl Size...: 0 MiB [00:00, ? MiB/s]"
350 | ]
351 | },
352 | "metadata": {},
353 | "output_type": "display_data"
354 | },
355 | {
356 | "data": {
357 | "application/vnd.jupyter.widget-view+json": {
358 | "model_id": "af4562715ddd41e786e717033a6f3fec",
359 | "version_major": 2,
360 | "version_minor": 0
361 | },
362 | "text/plain": [
363 | "Extraction completed...: 0 file [00:00, ? file/s]"
364 | ]
365 | },
366 | "metadata": {},
367 | "output_type": "display_data"
368 | },
369 | {
370 | "name": "stdout",
371 | "output_type": "stream",
372 | "text": [
373 | "\n",
374 | "\n",
375 | "\n"
376 | ]
377 | },
378 | {
379 | "data": {
380 | "application/vnd.jupyter.widget-view+json": {
381 | "model_id": "26d3cc1e67244584878097dd9b080a17",
382 | "version_major": 2,
383 | "version_minor": 0
384 | },
385 | "text/plain": [
386 | "0 examples [00:00, ? examples/s]"
387 | ]
388 | },
389 | "metadata": {},
390 | "output_type": "display_data"
391 | },
392 | {
393 | "name": "stdout",
394 | "output_type": "stream",
395 | "text": [
396 | "Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteYSSAF4/cifar10-train.tfrecord\n"
397 | ]
398 | },
399 | {
400 | "data": {
401 | "application/vnd.jupyter.widget-view+json": {
402 | "model_id": "c8243d1036a6458881205806c1e98661",
403 | "version_major": 2,
404 | "version_minor": 0
405 | },
406 | "text/plain": [
407 | " 0%| | 0/50000 [00:00, ? examples/s]"
408 | ]
409 | },
410 | "metadata": {},
411 | "output_type": "display_data"
412 | },
413 | {
414 | "data": {
415 | "application/vnd.jupyter.widget-view+json": {
416 | "model_id": "36dc8a6cbbf74faba3241968b910240d",
417 | "version_major": 2,
418 | "version_minor": 0
419 | },
420 | "text/plain": [
421 | "0 examples [00:00, ? examples/s]"
422 | ]
423 | },
424 | "metadata": {},
425 | "output_type": "display_data"
426 | },
427 | {
428 | "name": "stdout",
429 | "output_type": "stream",
430 | "text": [
431 | "Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteYSSAF4/cifar10-test.tfrecord\n"
432 | ]
433 | },
434 | {
435 | "data": {
436 | "application/vnd.jupyter.widget-view+json": {
437 | "model_id": "3c8a78451e4f44049c1106ad8c0c9f43",
438 | "version_major": 2,
439 | "version_minor": 0
440 | },
441 | "text/plain": [
442 | " 0%| | 0/10000 [00:00, ? examples/s]"
443 | ]
444 | },
445 | "metadata": {},
446 | "output_type": "display_data"
447 | },
448 | {
449 | "name": "stdout",
450 | "output_type": "stream",
451 | "text": [
452 | "\u001b[1mDataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.\u001b[0m\n"
453 | ]
454 | }
455 | ],
456 | "source": [
457 | "import tensorflow_datasets as tfds\n",
458 | "\n",
459 | "# Generate TFRecords with TFDS\n",
460 | "builder = tfds.builder(\"cifar10\")\n",
461 | "builder.download_and_prepare()"
462 | ]
463 | },
464 | {
465 | "cell_type": "markdown",
466 | "metadata": {
467 | "id": "U3YidvVPppba"
468 | },
469 | "source": [
470 | "### 3. Copy Downloaded Dataset to the Directories that We have created"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": 5,
476 | "metadata": {
477 | "id": "bXUmXk7DQUxS"
478 | },
479 | "outputs": [],
480 | "source": [
481 | "!cp {builder.data_dir}/cifar10-train.tfrecord-00000-of-00001 {TARGET_TRAIN_DIR}/cifar10-train.tfrecord\n",
482 | "!cp {builder.data_dir}/cifar10-test.tfrecord-00000-of-00001 {TARGET_TEST_DIR}/cifar10-test.tfrecord"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 6,
488 | "metadata": {
489 | "colab": {
490 | "base_uri": "https://localhost:8080/"
491 | },
492 | "id": "SkC_wyRYLWCT",
493 | "outputId": "f6c572c0-1f30-48e7-8fec-1a84fad4663b"
494 | },
495 | "outputs": [
496 | {
497 | "name": "stdout",
498 | "output_type": "stream",
499 | "text": [
500 | "cifar10:\n",
501 | "span-1\n",
502 | "\n",
503 | "cifar10/span-1:\n",
504 | "test train\n",
505 | "\n",
506 | "cifar10/span-1/test:\n",
507 | "cifar10-test.tfrecord\n",
508 | "\n",
509 | "cifar10/span-1/train:\n",
510 | "cifar10-train.tfrecord\n"
511 | ]
512 | }
513 | ],
514 | "source": [
515 | "!ls -R {TARGET_ROOT_DIR}"
516 | ]
517 | },
518 | {
519 | "cell_type": "markdown",
520 | "metadata": {
521 | "id": "U7nDmdxY-rFr"
522 | },
523 | "source": [
524 | "### 4. Copy Local Files to the GCS Bucket"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": 7,
530 | "metadata": {
531 | "colab": {
532 | "base_uri": "https://localhost:8080/"
533 | },
534 | "id": "eqldz8Jkz8se",
535 | "outputId": "483033e6-efa1-4beb-c2e7-156469461b23"
536 | },
537 | "outputs": [
538 | {
539 | "name": "stdout",
540 | "output_type": "stream",
541 | "text": [
542 | "Creating gs://cifar10-csp-public2/...\n",
543 | "ServiceException: 409 A Cloud Storage bucket named 'cifar10-csp-public2' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.\n",
544 | "Copying file://cifar10/span-1/test/cifar10-test.tfrecord [Content-Type=application/octet-stream]...\n",
545 | "Copying file://cifar10/span-1/train/cifar10-train.tfrecord [Content-Type=application/octet-stream]...\n",
546 | "/\n",
547 | "Operation completed over 2 objects/133.3 MiB. \n"
548 | ]
549 | }
550 | ],
551 | "source": [
552 | "#@title GCS\n",
553 | "#@markdown You should change these values as per your preferences. The copy operation can take ~5 minutes. \n",
554 | "BUCKET_PATH = \"gs://cifar10-csp-public2\" #@param {type:\"string\"}\n",
555 | "REGION = \"us-central1\" #@param {type:\"string\"}\n",
556 | "\n",
557 | "!gsutil mb -l {REGION} {BUCKET_PATH}\n",
558 | "!gsutil -m cp -r {TARGET_ROOT_DIR}/* {BUCKET_PATH}"
559 | ]
560 | },
561 | {
562 | "cell_type": "markdown",
563 | "metadata": {
564 | "id": "UX7Gw2_h-4Pk"
565 | },
566 | "source": [
567 | "Verify if the files were copied over."
568 | ]
569 | },
570 | {
571 | "cell_type": "code",
572 | "execution_count": 8,
573 | "metadata": {
574 | "colab": {
575 | "base_uri": "https://localhost:8080/"
576 | },
577 | "id": "KNfP-pbowVkU",
578 | "outputId": "1f078b96-d70f-4ea7-c4f7-a1bd6d74d036"
579 | },
580 | "outputs": [
581 | {
582 | "name": "stdout",
583 | "output_type": "stream",
584 | "text": [
585 | "gs://cifar10-csp-public2/span-1/:\n",
586 | "\n",
587 | "gs://cifar10-csp-public2/span-1/test/:\n",
588 | "gs://cifar10-csp-public2/span-1/test/cifar10-test.tfrecord\n",
589 | "\n",
590 | "gs://cifar10-csp-public2/span-1/train/:\n",
591 | "gs://cifar10-csp-public2/span-1/train/cifar10-train.tfrecord\n"
592 | ]
593 | }
594 | ],
595 | "source": [
596 | "!gsutil ls -R {BUCKET_PATH}/"
597 | ]
598 | },
599 | {
600 | "cell_type": "markdown",
601 | "metadata": {
602 | "id": "1sVvSU4Alh-J"
603 | },
604 | "source": [
605 | "# Test with TFX's built-in function\n",
606 | "\n",
607 | "TFX provides [`calculate_splits_fingerprint_span_and_version`](https://github.com/tensorflow/tfx/blob/00571387b7b006e2ebb0c1277380e5a47d8f0ffa/tfx/components/example_gen/utils.py#L648) function which calculates and returns the current `SPAN` and `VERSION`.\n",
608 | "\n",
609 | "> Please note this section only works within GCP Vertex Notebook environment due to the authentication issue. If you know how to setup GCS access privilege for TFX, please let me know."
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": null,
615 | "metadata": {
616 | "id": "Dw8IvNVRllqI"
617 | },
618 | "outputs": [],
619 | "source": [
620 | "!pip install tfx==1.2.0"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": null,
626 | "metadata": {
627 | "id": "Xi8xgC8wZqVD"
628 | },
629 | "outputs": [],
630 | "source": [
631 | "from tfx import v1 as tfx\n",
632 | "from tfx.components.example_gen import utils"
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "execution_count": null,
638 | "metadata": {
639 | "id": "Ma2QzXrEZuVX"
640 | },
641 | "outputs": [],
642 | "source": [
643 | "from tfx.proto import example_gen_pb2\n",
644 | "\n",
645 | "_DATA_PATH = \"gs://cifar10-csp-public\"\n",
646 | "\n",
647 | "splits = [\n",
648 | " example_gen_pb2.Input.Split(name=\"train\", pattern=\"span-{SPAN}/train/*\"),\n",
649 | " example_gen_pb2.Input.Split(name=\"val\", pattern=\"span-{SPAN}/test/*\"),\n",
650 | "]\n",
651 | "\n",
652 | "_, span, version = utils.calculate_splits_fingerprint_span_and_version(\n",
653 | " _DATA_PATH, splits\n",
654 | ")"
655 | ]
656 | },
657 | {
658 | "cell_type": "code",
659 | "execution_count": null,
660 | "metadata": {
661 | "colab": {
662 | "base_uri": "https://localhost:8080/"
663 | },
664 | "id": "UZFygbeUaccg",
665 | "outputId": "06a298da-7509-48c8-9ee9-58c3dc320f5b"
666 | },
667 | "outputs": [
668 | {
669 | "data": {
670 | "text/plain": [
671 | "(1, None)"
672 | ]
673 | },
674 | "execution_count": 8,
675 | "metadata": {},
676 | "output_type": "execute_result"
677 | }
678 | ],
679 | "source": [
680 | "span, version"
681 | ]
682 | }
683 | ],
684 | "metadata": {
685 | "colab": {
686 | "include_colab_link": true,
687 | "name": "Dataset_Prep",
688 | "provenance": []
689 | },
690 | "kernelspec": {
691 | "display_name": "Python 3 (ipykernel)",
692 | "language": "python",
693 | "name": "python3"
694 | },
695 | "language_info": {
696 | "codemirror_mode": {
697 | "name": "ipython",
698 | "version": 3
699 | },
700 | "file_extension": ".py",
701 | "mimetype": "text/x-python",
702 | "name": "python",
703 | "nbconvert_exporter": "python",
704 | "pygments_lexer": "ipython3",
705 | "version": "3.8.2"
706 | },
707 | "widgets": {
708 | "application/vnd.jupyter.widget-state+json": {
709 | "0b02c308f2604dd28dd5b430d6239253": {
710 | "model_module": "@jupyter-widgets/controls",
711 | "model_module_version": "1.5.0",
712 | "model_name": "DescriptionStyleModel",
713 | "state": {
714 | "_model_module": "@jupyter-widgets/controls",
715 | "_model_module_version": "1.5.0",
716 | "_model_name": "DescriptionStyleModel",
717 | "_view_count": null,
718 | "_view_module": "@jupyter-widgets/base",
719 | "_view_module_version": "1.2.0",
720 | "_view_name": "StyleView",
721 | "description_width": ""
722 | }
723 | },
724 | "158453b7af414664b766d99ecbb9429e": {
725 | "model_module": "@jupyter-widgets/controls",
726 | "model_module_version": "1.5.0",
727 | "model_name": "HTMLModel",
728 | "state": {
729 | "_dom_classes": [],
730 | "_model_module": "@jupyter-widgets/controls",
731 | "_model_module_version": "1.5.0",
732 | "_model_name": "HTMLModel",
733 | "_view_count": null,
734 | "_view_module": "@jupyter-widgets/controls",
735 | "_view_module_version": "1.5.0",
736 | "_view_name": "HTMLView",
737 | "description": "",
738 | "description_tooltip": null,
739 | "layout": "IPY_MODEL_c8c9446c1a2f4a3785600ab596a451c9",
740 | "placeholder": "",
741 | "style": "IPY_MODEL_b7e8c1bf6e394e61aae415a735b18ffe",
742 | "value": "Dl Size...: 100%"
743 | }
744 | },
745 | "1a9b33aebfb444a9b3d9442a5d72cea2": {
746 | "model_module": "@jupyter-widgets/controls",
747 | "model_module_version": "1.5.0",
748 | "model_name": "DescriptionStyleModel",
749 | "state": {
750 | "_model_module": "@jupyter-widgets/controls",
751 | "_model_module_version": "1.5.0",
752 | "_model_name": "DescriptionStyleModel",
753 | "_view_count": null,
754 | "_view_module": "@jupyter-widgets/base",
755 | "_view_module_version": "1.2.0",
756 | "_view_name": "StyleView",
757 | "description_width": ""
758 | }
759 | },
760 | "20d532a52ecd479fb7dfa9342decd2d1": {
761 | "model_module": "@jupyter-widgets/base",
762 | "model_module_version": "1.2.0",
763 | "model_name": "LayoutModel",
764 | "state": {
765 | "_model_module": "@jupyter-widgets/base",
766 | "_model_module_version": "1.2.0",
767 | "_model_name": "LayoutModel",
768 | "_view_count": null,
769 | "_view_module": "@jupyter-widgets/base",
770 | "_view_module_version": "1.2.0",
771 | "_view_name": "LayoutView",
772 | "align_content": null,
773 | "align_items": null,
774 | "align_self": null,
775 | "border": null,
776 | "bottom": null,
777 | "display": null,
778 | "flex": null,
779 | "flex_flow": null,
780 | "grid_area": null,
781 | "grid_auto_columns": null,
782 | "grid_auto_flow": null,
783 | "grid_auto_rows": null,
784 | "grid_column": null,
785 | "grid_gap": null,
786 | "grid_row": null,
787 | "grid_template_areas": null,
788 | "grid_template_columns": null,
789 | "grid_template_rows": null,
790 | "height": null,
791 | "justify_content": null,
792 | "justify_items": null,
793 | "left": null,
794 | "margin": null,
795 | "max_height": null,
796 | "max_width": null,
797 | "min_height": null,
798 | "min_width": null,
799 | "object_fit": null,
800 | "object_position": null,
801 | "order": null,
802 | "overflow": null,
803 | "overflow_x": null,
804 | "overflow_y": null,
805 | "padding": null,
806 | "right": null,
807 | "top": null,
808 | "visibility": null,
809 | "width": "20px"
810 | }
811 | },
812 | "29aa47ab203d4099aad31f79c72421f4": {
813 | "model_module": "@jupyter-widgets/base",
814 | "model_module_version": "1.2.0",
815 | "model_name": "LayoutModel",
816 | "state": {
817 | "_model_module": "@jupyter-widgets/base",
818 | "_model_module_version": "1.2.0",
819 | "_model_name": "LayoutModel",
820 | "_view_count": null,
821 | "_view_module": "@jupyter-widgets/base",
822 | "_view_module_version": "1.2.0",
823 | "_view_name": "LayoutView",
824 | "align_content": null,
825 | "align_items": null,
826 | "align_self": null,
827 | "border": null,
828 | "bottom": null,
829 | "display": null,
830 | "flex": null,
831 | "flex_flow": null,
832 | "grid_area": null,
833 | "grid_auto_columns": null,
834 | "grid_auto_flow": null,
835 | "grid_auto_rows": null,
836 | "grid_column": null,
837 | "grid_gap": null,
838 | "grid_row": null,
839 | "grid_template_areas": null,
840 | "grid_template_columns": null,
841 | "grid_template_rows": null,
842 | "height": null,
843 | "justify_content": null,
844 | "justify_items": null,
845 | "left": null,
846 | "margin": null,
847 | "max_height": null,
848 | "max_width": null,
849 | "min_height": null,
850 | "min_width": null,
851 | "object_fit": null,
852 | "object_position": null,
853 | "order": null,
854 | "overflow": null,
855 | "overflow_x": null,
856 | "overflow_y": null,
857 | "padding": null,
858 | "right": null,
859 | "top": null,
860 | "visibility": null,
861 | "width": "20px"
862 | }
863 | },
864 | "2f93c4391a204a26a8179b0a6b46a23e": {
865 | "model_module": "@jupyter-widgets/controls",
866 | "model_module_version": "1.5.0",
867 | "model_name": "FloatProgressModel",
868 | "state": {
869 | "_dom_classes": [],
870 | "_model_module": "@jupyter-widgets/controls",
871 | "_model_module_version": "1.5.0",
872 | "_model_name": "FloatProgressModel",
873 | "_view_count": null,
874 | "_view_module": "@jupyter-widgets/controls",
875 | "_view_module_version": "1.5.0",
876 | "_view_name": "ProgressView",
877 | "bar_style": "success",
878 | "description": "",
879 | "description_tooltip": null,
880 | "layout": "IPY_MODEL_29aa47ab203d4099aad31f79c72421f4",
881 | "max": 1,
882 | "min": 0,
883 | "orientation": "horizontal",
884 | "style": "IPY_MODEL_e3c232f427c649f2bf3f58ed97ad15d1",
885 | "value": 1
886 | }
887 | },
888 | "385a10a4ca644ca38db0f539be5a54d5": {
889 | "model_module": "@jupyter-widgets/controls",
890 | "model_module_version": "1.5.0",
891 | "model_name": "DescriptionStyleModel",
892 | "state": {
893 | "_model_module": "@jupyter-widgets/controls",
894 | "_model_module_version": "1.5.0",
895 | "_model_name": "DescriptionStyleModel",
896 | "_view_count": null,
897 | "_view_module": "@jupyter-widgets/base",
898 | "_view_module_version": "1.2.0",
899 | "_view_name": "StyleView",
900 | "description_width": ""
901 | }
902 | },
903 | "4d9642439f084654b63b4e7bde24b89c": {
904 | "model_module": "@jupyter-widgets/controls",
905 | "model_module_version": "1.5.0",
906 | "model_name": "HBoxModel",
907 | "state": {
908 | "_dom_classes": [],
909 | "_model_module": "@jupyter-widgets/controls",
910 | "_model_module_version": "1.5.0",
911 | "_model_name": "HBoxModel",
912 | "_view_count": null,
913 | "_view_module": "@jupyter-widgets/controls",
914 | "_view_module_version": "1.5.0",
915 | "_view_name": "HBoxView",
916 | "box_style": "",
917 | "children": [
918 | "IPY_MODEL_158453b7af414664b766d99ecbb9429e",
919 | "IPY_MODEL_6bf37bc7f5684a8eb3dcf625d02616fd",
920 | "IPY_MODEL_a7265046f01343a0a9de7635e60e2d5f"
921 | ],
922 | "layout": "IPY_MODEL_fd5c76c7cda94b22814409cbda62a38d"
923 | }
924 | },
925 | "4f7c45939d3f49eb856ccc58f999e37b": {
926 | "model_module": "@jupyter-widgets/base",
927 | "model_module_version": "1.2.0",
928 | "model_name": "LayoutModel",
929 | "state": {
930 | "_model_module": "@jupyter-widgets/base",
931 | "_model_module_version": "1.2.0",
932 | "_model_name": "LayoutModel",
933 | "_view_count": null,
934 | "_view_module": "@jupyter-widgets/base",
935 | "_view_module_version": "1.2.0",
936 | "_view_name": "LayoutView",
937 | "align_content": null,
938 | "align_items": null,
939 | "align_self": null,
940 | "border": null,
941 | "bottom": null,
942 | "display": null,
943 | "flex": null,
944 | "flex_flow": null,
945 | "grid_area": null,
946 | "grid_auto_columns": null,
947 | "grid_auto_flow": null,
948 | "grid_auto_rows": null,
949 | "grid_column": null,
950 | "grid_gap": null,
951 | "grid_row": null,
952 | "grid_template_areas": null,
953 | "grid_template_columns": null,
954 | "grid_template_rows": null,
955 | "height": null,
956 | "justify_content": null,
957 | "justify_items": null,
958 | "left": null,
959 | "margin": null,
960 | "max_height": null,
961 | "max_width": null,
962 | "min_height": null,
963 | "min_width": null,
964 | "object_fit": null,
965 | "object_position": null,
966 | "order": null,
967 | "overflow": null,
968 | "overflow_x": null,
969 | "overflow_y": null,
970 | "padding": null,
971 | "right": null,
972 | "top": null,
973 | "visibility": null,
974 | "width": null
975 | }
976 | },
977 | "4fdde132c9744c74bbf7aa7a3185ae64": {
978 | "model_module": "@jupyter-widgets/controls",
979 | "model_module_version": "1.5.0",
980 | "model_name": "HTMLModel",
981 | "state": {
982 | "_dom_classes": [],
983 | "_model_module": "@jupyter-widgets/controls",
984 | "_model_module_version": "1.5.0",
985 | "_model_name": "HTMLModel",
986 | "_view_count": null,
987 | "_view_module": "@jupyter-widgets/controls",
988 | "_view_module_version": "1.5.0",
989 | "_view_name": "HTMLView",
990 | "description": "",
991 | "description_tooltip": null,
992 | "layout": "IPY_MODEL_9b8d5063647d466684c6e1b196a90796",
993 | "placeholder": "",
994 | "style": "IPY_MODEL_5a452ead65af499bbf868b5314dd7196",
995 | "value": " 1/1 [00:14<00:00, 14.94s/ file]"
996 | }
997 | },
998 | "5a452ead65af499bbf868b5314dd7196": {
999 | "model_module": "@jupyter-widgets/controls",
1000 | "model_module_version": "1.5.0",
1001 | "model_name": "DescriptionStyleModel",
1002 | "state": {
1003 | "_model_module": "@jupyter-widgets/controls",
1004 | "_model_module_version": "1.5.0",
1005 | "_model_name": "DescriptionStyleModel",
1006 | "_view_count": null,
1007 | "_view_module": "@jupyter-widgets/base",
1008 | "_view_module_version": "1.2.0",
1009 | "_view_name": "StyleView",
1010 | "description_width": ""
1011 | }
1012 | },
1013 | "5b27a0a8a6964be49befadfc9d209c93": {
1014 | "model_module": "@jupyter-widgets/base",
1015 | "model_module_version": "1.2.0",
1016 | "model_name": "LayoutModel",
1017 | "state": {
1018 | "_model_module": "@jupyter-widgets/base",
1019 | "_model_module_version": "1.2.0",
1020 | "_model_name": "LayoutModel",
1021 | "_view_count": null,
1022 | "_view_module": "@jupyter-widgets/base",
1023 | "_view_module_version": "1.2.0",
1024 | "_view_name": "LayoutView",
1025 | "align_content": null,
1026 | "align_items": null,
1027 | "align_self": null,
1028 | "border": null,
1029 | "bottom": null,
1030 | "display": null,
1031 | "flex": null,
1032 | "flex_flow": null,
1033 | "grid_area": null,
1034 | "grid_auto_columns": null,
1035 | "grid_auto_flow": null,
1036 | "grid_auto_rows": null,
1037 | "grid_column": null,
1038 | "grid_gap": null,
1039 | "grid_row": null,
1040 | "grid_template_areas": null,
1041 | "grid_template_columns": null,
1042 | "grid_template_rows": null,
1043 | "height": null,
1044 | "justify_content": null,
1045 | "justify_items": null,
1046 | "left": null,
1047 | "margin": null,
1048 | "max_height": null,
1049 | "max_width": null,
1050 | "min_height": null,
1051 | "min_width": null,
1052 | "object_fit": null,
1053 | "object_position": null,
1054 | "order": null,
1055 | "overflow": null,
1056 | "overflow_x": null,
1057 | "overflow_y": null,
1058 | "padding": null,
1059 | "right": null,
1060 | "top": null,
1061 | "visibility": null,
1062 | "width": null
1063 | }
1064 | },
1065 | "6bf37bc7f5684a8eb3dcf625d02616fd": {
1066 | "model_module": "@jupyter-widgets/controls",
1067 | "model_module_version": "1.5.0",
1068 | "model_name": "FloatProgressModel",
1069 | "state": {
1070 | "_dom_classes": [],
1071 | "_model_module": "@jupyter-widgets/controls",
1072 | "_model_module_version": "1.5.0",
1073 | "_model_name": "FloatProgressModel",
1074 | "_view_count": null,
1075 | "_view_module": "@jupyter-widgets/controls",
1076 | "_view_module_version": "1.5.0",
1077 | "_view_name": "ProgressView",
1078 | "bar_style": "success",
1079 | "description": "",
1080 | "description_tooltip": null,
1081 | "layout": "IPY_MODEL_20d532a52ecd479fb7dfa9342decd2d1",
1082 | "max": 1,
1083 | "min": 0,
1084 | "orientation": "horizontal",
1085 | "style": "IPY_MODEL_a9e7a7b675844f1ab616bb1972c15d32",
1086 | "value": 1
1087 | }
1088 | },
1089 | "6f08665adc814ac09e7cb21e0533866c": {
1090 | "model_module": "@jupyter-widgets/controls",
1091 | "model_module_version": "1.5.0",
1092 | "model_name": "DescriptionStyleModel",
1093 | "state": {
1094 | "_model_module": "@jupyter-widgets/controls",
1095 | "_model_module_version": "1.5.0",
1096 | "_model_name": "DescriptionStyleModel",
1097 | "_view_count": null,
1098 | "_view_module": "@jupyter-widgets/base",
1099 | "_view_module_version": "1.2.0",
1100 | "_view_name": "StyleView",
1101 | "description_width": ""
1102 | }
1103 | },
1104 | "710373ad31fe4dc0b29c7c681e345cc8": {
1105 | "model_module": "@jupyter-widgets/base",
1106 | "model_module_version": "1.2.0",
1107 | "model_name": "LayoutModel",
1108 | "state": {
1109 | "_model_module": "@jupyter-widgets/base",
1110 | "_model_module_version": "1.2.0",
1111 | "_model_name": "LayoutModel",
1112 | "_view_count": null,
1113 | "_view_module": "@jupyter-widgets/base",
1114 | "_view_module_version": "1.2.0",
1115 | "_view_name": "LayoutView",
1116 | "align_content": null,
1117 | "align_items": null,
1118 | "align_self": null,
1119 | "border": null,
1120 | "bottom": null,
1121 | "display": null,
1122 | "flex": null,
1123 | "flex_flow": null,
1124 | "grid_area": null,
1125 | "grid_auto_columns": null,
1126 | "grid_auto_flow": null,
1127 | "grid_auto_rows": null,
1128 | "grid_column": null,
1129 | "grid_gap": null,
1130 | "grid_row": null,
1131 | "grid_template_areas": null,
1132 | "grid_template_columns": null,
1133 | "grid_template_rows": null,
1134 | "height": null,
1135 | "justify_content": null,
1136 | "justify_items": null,
1137 | "left": null,
1138 | "margin": null,
1139 | "max_height": null,
1140 | "max_width": null,
1141 | "min_height": null,
1142 | "min_width": null,
1143 | "object_fit": null,
1144 | "object_position": null,
1145 | "order": null,
1146 | "overflow": null,
1147 | "overflow_x": null,
1148 | "overflow_y": null,
1149 | "padding": null,
1150 | "right": null,
1151 | "top": null,
1152 | "visibility": null,
1153 | "width": null
1154 | }
1155 | },
1156 | "9613736bf85c44338d6f5609e6efc90b": {
1157 | "model_module": "@jupyter-widgets/base",
1158 | "model_module_version": "1.2.0",
1159 | "model_name": "LayoutModel",
1160 | "state": {
1161 | "_model_module": "@jupyter-widgets/base",
1162 | "_model_module_version": "1.2.0",
1163 | "_model_name": "LayoutModel",
1164 | "_view_count": null,
1165 | "_view_module": "@jupyter-widgets/base",
1166 | "_view_module_version": "1.2.0",
1167 | "_view_name": "LayoutView",
1168 | "align_content": null,
1169 | "align_items": null,
1170 | "align_self": null,
1171 | "border": null,
1172 | "bottom": null,
1173 | "display": null,
1174 | "flex": null,
1175 | "flex_flow": null,
1176 | "grid_area": null,
1177 | "grid_auto_columns": null,
1178 | "grid_auto_flow": null,
1179 | "grid_auto_rows": null,
1180 | "grid_column": null,
1181 | "grid_gap": null,
1182 | "grid_row": null,
1183 | "grid_template_areas": null,
1184 | "grid_template_columns": null,
1185 | "grid_template_rows": null,
1186 | "height": null,
1187 | "justify_content": null,
1188 | "justify_items": null,
1189 | "left": null,
1190 | "margin": null,
1191 | "max_height": null,
1192 | "max_width": null,
1193 | "min_height": null,
1194 | "min_width": null,
1195 | "object_fit": null,
1196 | "object_position": null,
1197 | "order": null,
1198 | "overflow": null,
1199 | "overflow_x": null,
1200 | "overflow_y": null,
1201 | "padding": null,
1202 | "right": null,
1203 | "top": null,
1204 | "visibility": null,
1205 | "width": "20px"
1206 | }
1207 | },
1208 | "9b8d5063647d466684c6e1b196a90796": {
1209 | "model_module": "@jupyter-widgets/base",
1210 | "model_module_version": "1.2.0",
1211 | "model_name": "LayoutModel",
1212 | "state": {
1213 | "_model_module": "@jupyter-widgets/base",
1214 | "_model_module_version": "1.2.0",
1215 | "_model_name": "LayoutModel",
1216 | "_view_count": null,
1217 | "_view_module": "@jupyter-widgets/base",
1218 | "_view_module_version": "1.2.0",
1219 | "_view_name": "LayoutView",
1220 | "align_content": null,
1221 | "align_items": null,
1222 | "align_self": null,
1223 | "border": null,
1224 | "bottom": null,
1225 | "display": null,
1226 | "flex": null,
1227 | "flex_flow": null,
1228 | "grid_area": null,
1229 | "grid_auto_columns": null,
1230 | "grid_auto_flow": null,
1231 | "grid_auto_rows": null,
1232 | "grid_column": null,
1233 | "grid_gap": null,
1234 | "grid_row": null,
1235 | "grid_template_areas": null,
1236 | "grid_template_columns": null,
1237 | "grid_template_rows": null,
1238 | "height": null,
1239 | "justify_content": null,
1240 | "justify_items": null,
1241 | "left": null,
1242 | "margin": null,
1243 | "max_height": null,
1244 | "max_width": null,
1245 | "min_height": null,
1246 | "min_width": null,
1247 | "object_fit": null,
1248 | "object_position": null,
1249 | "order": null,
1250 | "overflow": null,
1251 | "overflow_x": null,
1252 | "overflow_y": null,
1253 | "padding": null,
1254 | "right": null,
1255 | "top": null,
1256 | "visibility": null,
1257 | "width": null
1258 | }
1259 | },
1260 | "a327863b15a54d1eb8cd2c26ee31c826": {
1261 | "model_module": "@jupyter-widgets/controls",
1262 | "model_module_version": "1.5.0",
1263 | "model_name": "HTMLModel",
1264 | "state": {
1265 | "_dom_classes": [],
1266 | "_model_module": "@jupyter-widgets/controls",
1267 | "_model_module_version": "1.5.0",
1268 | "_model_name": "HTMLModel",
1269 | "_view_count": null,
1270 | "_view_module": "@jupyter-widgets/controls",
1271 | "_view_module_version": "1.5.0",
1272 | "_view_name": "HTMLView",
1273 | "description": "",
1274 | "description_tooltip": null,
1275 | "layout": "IPY_MODEL_710373ad31fe4dc0b29c7c681e345cc8",
1276 | "placeholder": "",
1277 | "style": "IPY_MODEL_1a9b33aebfb444a9b3d9442a5d72cea2",
1278 | "value": "Dl Completed...: 100%"
1279 | }
1280 | },
1281 | "a7265046f01343a0a9de7635e60e2d5f": {
1282 | "model_module": "@jupyter-widgets/controls",
1283 | "model_module_version": "1.5.0",
1284 | "model_name": "HTMLModel",
1285 | "state": {
1286 | "_dom_classes": [],
1287 | "_model_module": "@jupyter-widgets/controls",
1288 | "_model_module_version": "1.5.0",
1289 | "_model_name": "HTMLModel",
1290 | "_view_count": null,
1291 | "_view_module": "@jupyter-widgets/controls",
1292 | "_view_module_version": "1.5.0",
1293 | "_view_name": "HTMLView",
1294 | "description": "",
1295 | "description_tooltip": null,
1296 | "layout": "IPY_MODEL_b515ef8f06744ce486f110326909e90e",
1297 | "placeholder": "",
1298 | "style": "IPY_MODEL_0b02c308f2604dd28dd5b430d6239253",
1299 | "value": " 162/162 [00:14<00:00, 15.22 MiB/s]"
1300 | }
1301 | },
1302 | "a9e7a7b675844f1ab616bb1972c15d32": {
1303 | "model_module": "@jupyter-widgets/controls",
1304 | "model_module_version": "1.5.0",
1305 | "model_name": "ProgressStyleModel",
1306 | "state": {
1307 | "_model_module": "@jupyter-widgets/controls",
1308 | "_model_module_version": "1.5.0",
1309 | "_model_name": "ProgressStyleModel",
1310 | "_view_count": null,
1311 | "_view_module": "@jupyter-widgets/base",
1312 | "_view_module_version": "1.2.0",
1313 | "_view_name": "StyleView",
1314 | "bar_color": null,
1315 | "description_width": ""
1316 | }
1317 | },
1318 | "af4562715ddd41e786e717033a6f3fec": {
1319 | "model_module": "@jupyter-widgets/controls",
1320 | "model_module_version": "1.5.0",
1321 | "model_name": "HBoxModel",
1322 | "state": {
1323 | "_dom_classes": [],
1324 | "_model_module": "@jupyter-widgets/controls",
1325 | "_model_module_version": "1.5.0",
1326 | "_model_name": "HBoxModel",
1327 | "_view_count": null,
1328 | "_view_module": "@jupyter-widgets/controls",
1329 | "_view_module_version": "1.5.0",
1330 | "_view_name": "HBoxView",
1331 | "box_style": "",
1332 | "children": [
1333 | "IPY_MODEL_d0c79d32309640a18b1097e50c273489",
1334 | "IPY_MODEL_2f93c4391a204a26a8179b0a6b46a23e",
1335 | "IPY_MODEL_4fdde132c9744c74bbf7aa7a3185ae64"
1336 | ],
1337 | "layout": "IPY_MODEL_5b27a0a8a6964be49befadfc9d209c93"
1338 | }
1339 | },
1340 | "b515ef8f06744ce486f110326909e90e": {
1341 | "model_module": "@jupyter-widgets/base",
1342 | "model_module_version": "1.2.0",
1343 | "model_name": "LayoutModel",
1344 | "state": {
1345 | "_model_module": "@jupyter-widgets/base",
1346 | "_model_module_version": "1.2.0",
1347 | "_model_name": "LayoutModel",
1348 | "_view_count": null,
1349 | "_view_module": "@jupyter-widgets/base",
1350 | "_view_module_version": "1.2.0",
1351 | "_view_name": "LayoutView",
1352 | "align_content": null,
1353 | "align_items": null,
1354 | "align_self": null,
1355 | "border": null,
1356 | "bottom": null,
1357 | "display": null,
1358 | "flex": null,
1359 | "flex_flow": null,
1360 | "grid_area": null,
1361 | "grid_auto_columns": null,
1362 | "grid_auto_flow": null,
1363 | "grid_auto_rows": null,
1364 | "grid_column": null,
1365 | "grid_gap": null,
1366 | "grid_row": null,
1367 | "grid_template_areas": null,
1368 | "grid_template_columns": null,
1369 | "grid_template_rows": null,
1370 | "height": null,
1371 | "justify_content": null,
1372 | "justify_items": null,
1373 | "left": null,
1374 | "margin": null,
1375 | "max_height": null,
1376 | "max_width": null,
1377 | "min_height": null,
1378 | "min_width": null,
1379 | "object_fit": null,
1380 | "object_position": null,
1381 | "order": null,
1382 | "overflow": null,
1383 | "overflow_x": null,
1384 | "overflow_y": null,
1385 | "padding": null,
1386 | "right": null,
1387 | "top": null,
1388 | "visibility": null,
1389 | "width": null
1390 | }
1391 | },
1392 | "b7e8c1bf6e394e61aae415a735b18ffe": {
1393 | "model_module": "@jupyter-widgets/controls",
1394 | "model_module_version": "1.5.0",
1395 | "model_name": "DescriptionStyleModel",
1396 | "state": {
1397 | "_model_module": "@jupyter-widgets/controls",
1398 | "_model_module_version": "1.5.0",
1399 | "_model_name": "DescriptionStyleModel",
1400 | "_view_count": null,
1401 | "_view_module": "@jupyter-widgets/base",
1402 | "_view_module_version": "1.2.0",
1403 | "_view_name": "StyleView",
1404 | "description_width": ""
1405 | }
1406 | },
1407 | "c8c9446c1a2f4a3785600ab596a451c9": {
1408 | "model_module": "@jupyter-widgets/base",
1409 | "model_module_version": "1.2.0",
1410 | "model_name": "LayoutModel",
1411 | "state": {
1412 | "_model_module": "@jupyter-widgets/base",
1413 | "_model_module_version": "1.2.0",
1414 | "_model_name": "LayoutModel",
1415 | "_view_count": null,
1416 | "_view_module": "@jupyter-widgets/base",
1417 | "_view_module_version": "1.2.0",
1418 | "_view_name": "LayoutView",
1419 | "align_content": null,
1420 | "align_items": null,
1421 | "align_self": null,
1422 | "border": null,
1423 | "bottom": null,
1424 | "display": null,
1425 | "flex": null,
1426 | "flex_flow": null,
1427 | "grid_area": null,
1428 | "grid_auto_columns": null,
1429 | "grid_auto_flow": null,
1430 | "grid_auto_rows": null,
1431 | "grid_column": null,
1432 | "grid_gap": null,
1433 | "grid_row": null,
1434 | "grid_template_areas": null,
1435 | "grid_template_columns": null,
1436 | "grid_template_rows": null,
1437 | "height": null,
1438 | "justify_content": null,
1439 | "justify_items": null,
1440 | "left": null,
1441 | "margin": null,
1442 | "max_height": null,
1443 | "max_width": null,
1444 | "min_height": null,
1445 | "min_width": null,
1446 | "object_fit": null,
1447 | "object_position": null,
1448 | "order": null,
1449 | "overflow": null,
1450 | "overflow_x": null,
1451 | "overflow_y": null,
1452 | "padding": null,
1453 | "right": null,
1454 | "top": null,
1455 | "visibility": null,
1456 | "width": null
1457 | }
1458 | },
1459 | "d0c79d32309640a18b1097e50c273489": {
1460 | "model_module": "@jupyter-widgets/controls",
1461 | "model_module_version": "1.5.0",
1462 | "model_name": "HTMLModel",
1463 | "state": {
1464 | "_dom_classes": [],
1465 | "_model_module": "@jupyter-widgets/controls",
1466 | "_model_module_version": "1.5.0",
1467 | "_model_name": "HTMLModel",
1468 | "_view_count": null,
1469 | "_view_module": "@jupyter-widgets/controls",
1470 | "_view_module_version": "1.5.0",
1471 | "_view_name": "HTMLView",
1472 | "description": "",
1473 | "description_tooltip": null,
1474 | "layout": "IPY_MODEL_fd5460933db44c92b60e41b065551215",
1475 | "placeholder": "",
1476 | "style": "IPY_MODEL_6f08665adc814ac09e7cb21e0533866c",
1477 | "value": "Extraction completed...: 100%"
1478 | }
1479 | },
1480 | "d46fa732fb474fce8f940c031ca06637": {
1481 | "model_module": "@jupyter-widgets/controls",
1482 | "model_module_version": "1.5.0",
1483 | "model_name": "FloatProgressModel",
1484 | "state": {
1485 | "_dom_classes": [],
1486 | "_model_module": "@jupyter-widgets/controls",
1487 | "_model_module_version": "1.5.0",
1488 | "_model_name": "FloatProgressModel",
1489 | "_view_count": null,
1490 | "_view_module": "@jupyter-widgets/controls",
1491 | "_view_module_version": "1.5.0",
1492 | "_view_name": "ProgressView",
1493 | "bar_style": "success",
1494 | "description": "",
1495 | "description_tooltip": null,
1496 | "layout": "IPY_MODEL_9613736bf85c44338d6f5609e6efc90b",
1497 | "max": 1,
1498 | "min": 0,
1499 | "orientation": "horizontal",
1500 | "style": "IPY_MODEL_dbe66bad8ba94b30b93c7c9d25cf8fbd",
1501 | "value": 1
1502 | }
1503 | },
1504 | "dbe66bad8ba94b30b93c7c9d25cf8fbd": {
1505 | "model_module": "@jupyter-widgets/controls",
1506 | "model_module_version": "1.5.0",
1507 | "model_name": "ProgressStyleModel",
1508 | "state": {
1509 | "_model_module": "@jupyter-widgets/controls",
1510 | "_model_module_version": "1.5.0",
1511 | "_model_name": "ProgressStyleModel",
1512 | "_view_count": null,
1513 | "_view_module": "@jupyter-widgets/base",
1514 | "_view_module_version": "1.2.0",
1515 | "_view_name": "StyleView",
1516 | "bar_color": null,
1517 | "description_width": ""
1518 | }
1519 | },
1520 | "e3c232f427c649f2bf3f58ed97ad15d1": {
1521 | "model_module": "@jupyter-widgets/controls",
1522 | "model_module_version": "1.5.0",
1523 | "model_name": "ProgressStyleModel",
1524 | "state": {
1525 | "_model_module": "@jupyter-widgets/controls",
1526 | "_model_module_version": "1.5.0",
1527 | "_model_name": "ProgressStyleModel",
1528 | "_view_count": null,
1529 | "_view_module": "@jupyter-widgets/base",
1530 | "_view_module_version": "1.2.0",
1531 | "_view_name": "StyleView",
1532 | "bar_color": null,
1533 | "description_width": ""
1534 | }
1535 | },
1536 | "ecefe53c759e4a54aa1f3ed44444fdf5": {
1537 | "model_module": "@jupyter-widgets/base",
1538 | "model_module_version": "1.2.0",
1539 | "model_name": "LayoutModel",
1540 | "state": {
1541 | "_model_module": "@jupyter-widgets/base",
1542 | "_model_module_version": "1.2.0",
1543 | "_model_name": "LayoutModel",
1544 | "_view_count": null,
1545 | "_view_module": "@jupyter-widgets/base",
1546 | "_view_module_version": "1.2.0",
1547 | "_view_name": "LayoutView",
1548 | "align_content": null,
1549 | "align_items": null,
1550 | "align_self": null,
1551 | "border": null,
1552 | "bottom": null,
1553 | "display": null,
1554 | "flex": null,
1555 | "flex_flow": null,
1556 | "grid_area": null,
1557 | "grid_auto_columns": null,
1558 | "grid_auto_flow": null,
1559 | "grid_auto_rows": null,
1560 | "grid_column": null,
1561 | "grid_gap": null,
1562 | "grid_row": null,
1563 | "grid_template_areas": null,
1564 | "grid_template_columns": null,
1565 | "grid_template_rows": null,
1566 | "height": null,
1567 | "justify_content": null,
1568 | "justify_items": null,
1569 | "left": null,
1570 | "margin": null,
1571 | "max_height": null,
1572 | "max_width": null,
1573 | "min_height": null,
1574 | "min_width": null,
1575 | "object_fit": null,
1576 | "object_position": null,
1577 | "order": null,
1578 | "overflow": null,
1579 | "overflow_x": null,
1580 | "overflow_y": null,
1581 | "padding": null,
1582 | "right": null,
1583 | "top": null,
1584 | "visibility": null,
1585 | "width": null
1586 | }
1587 | },
1588 | "fb0e11738e734dc6a7428c4a82e81705": {
1589 | "model_module": "@jupyter-widgets/controls",
1590 | "model_module_version": "1.5.0",
1591 | "model_name": "HBoxModel",
1592 | "state": {
1593 | "_dom_classes": [],
1594 | "_model_module": "@jupyter-widgets/controls",
1595 | "_model_module_version": "1.5.0",
1596 | "_model_name": "HBoxModel",
1597 | "_view_count": null,
1598 | "_view_module": "@jupyter-widgets/controls",
1599 | "_view_module_version": "1.5.0",
1600 | "_view_name": "HBoxView",
1601 | "box_style": "",
1602 | "children": [
1603 | "IPY_MODEL_a327863b15a54d1eb8cd2c26ee31c826",
1604 | "IPY_MODEL_d46fa732fb474fce8f940c031ca06637",
1605 | "IPY_MODEL_fcf94990ac4f46b2a0fd54e06eb0f88b"
1606 | ],
1607 | "layout": "IPY_MODEL_4f7c45939d3f49eb856ccc58f999e37b"
1608 | }
1609 | },
1610 | "fcf94990ac4f46b2a0fd54e06eb0f88b": {
1611 | "model_module": "@jupyter-widgets/controls",
1612 | "model_module_version": "1.5.0",
1613 | "model_name": "HTMLModel",
1614 | "state": {
1615 | "_dom_classes": [],
1616 | "_model_module": "@jupyter-widgets/controls",
1617 | "_model_module_version": "1.5.0",
1618 | "_model_name": "HTMLModel",
1619 | "_view_count": null,
1620 | "_view_module": "@jupyter-widgets/controls",
1621 | "_view_module_version": "1.5.0",
1622 | "_view_name": "HTMLView",
1623 | "description": "",
1624 | "description_tooltip": null,
1625 | "layout": "IPY_MODEL_ecefe53c759e4a54aa1f3ed44444fdf5",
1626 | "placeholder": "",
1627 | "style": "IPY_MODEL_385a10a4ca644ca38db0f539be5a54d5",
1628 | "value": " 1/1 [00:15<00:00, 12.66s/ url]"
1629 | }
1630 | },
1631 | "fd5460933db44c92b60e41b065551215": {
1632 | "model_module": "@jupyter-widgets/base",
1633 | "model_module_version": "1.2.0",
1634 | "model_name": "LayoutModel",
1635 | "state": {
1636 | "_model_module": "@jupyter-widgets/base",
1637 | "_model_module_version": "1.2.0",
1638 | "_model_name": "LayoutModel",
1639 | "_view_count": null,
1640 | "_view_module": "@jupyter-widgets/base",
1641 | "_view_module_version": "1.2.0",
1642 | "_view_name": "LayoutView",
1643 | "align_content": null,
1644 | "align_items": null,
1645 | "align_self": null,
1646 | "border": null,
1647 | "bottom": null,
1648 | "display": null,
1649 | "flex": null,
1650 | "flex_flow": null,
1651 | "grid_area": null,
1652 | "grid_auto_columns": null,
1653 | "grid_auto_flow": null,
1654 | "grid_auto_rows": null,
1655 | "grid_column": null,
1656 | "grid_gap": null,
1657 | "grid_row": null,
1658 | "grid_template_areas": null,
1659 | "grid_template_columns": null,
1660 | "grid_template_rows": null,
1661 | "height": null,
1662 | "justify_content": null,
1663 | "justify_items": null,
1664 | "left": null,
1665 | "margin": null,
1666 | "max_height": null,
1667 | "max_width": null,
1668 | "min_height": null,
1669 | "min_width": null,
1670 | "object_fit": null,
1671 | "object_position": null,
1672 | "order": null,
1673 | "overflow": null,
1674 | "overflow_x": null,
1675 | "overflow_y": null,
1676 | "padding": null,
1677 | "right": null,
1678 | "top": null,
1679 | "visibility": null,
1680 | "width": null
1681 | }
1682 | },
1683 | "fd5c76c7cda94b22814409cbda62a38d": {
1684 | "model_module": "@jupyter-widgets/base",
1685 | "model_module_version": "1.2.0",
1686 | "model_name": "LayoutModel",
1687 | "state": {
1688 | "_model_module": "@jupyter-widgets/base",
1689 | "_model_module_version": "1.2.0",
1690 | "_model_name": "LayoutModel",
1691 | "_view_count": null,
1692 | "_view_module": "@jupyter-widgets/base",
1693 | "_view_module_version": "1.2.0",
1694 | "_view_name": "LayoutView",
1695 | "align_content": null,
1696 | "align_items": null,
1697 | "align_self": null,
1698 | "border": null,
1699 | "bottom": null,
1700 | "display": null,
1701 | "flex": null,
1702 | "flex_flow": null,
1703 | "grid_area": null,
1704 | "grid_auto_columns": null,
1705 | "grid_auto_flow": null,
1706 | "grid_auto_rows": null,
1707 | "grid_column": null,
1708 | "grid_gap": null,
1709 | "grid_row": null,
1710 | "grid_template_areas": null,
1711 | "grid_template_columns": null,
1712 | "grid_template_rows": null,
1713 | "height": null,
1714 | "justify_content": null,
1715 | "justify_items": null,
1716 | "left": null,
1717 | "margin": null,
1718 | "max_height": null,
1719 | "max_width": null,
1720 | "min_height": null,
1721 | "min_width": null,
1722 | "object_fit": null,
1723 | "object_position": null,
1724 | "order": null,
1725 | "overflow": null,
1726 | "overflow_x": null,
1727 | "overflow_y": null,
1728 | "padding": null,
1729 | "right": null,
1730 | "top": null,
1731 | "visibility": null,
1732 | "width": null
1733 | }
1734 | }
1735 | }
1736 | }
1737 | },
1738 | "nbformat": 4,
1739 | "nbformat_minor": 1
1740 | }
1741 |
--------------------------------------------------------------------------------