├── README.md
├── setup.py
├── notpackage
    └── notfunctions.py
├── azure-pipelines.yml
└── cicd-scripts
    └── installWhlLibrary.py


/README.md:
--------------------------------------------------------------------------------
1 | # databricks-cicd-definitelynotademo


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(name='notademo',
 4 |       version='0.0.21',
 5 |       description='A sample PySpark application - 0.0.21',
 6 |       author='Silviu Tofan',
 7 |       author_email='silviu@databricks.com',
 8 |       url='www.databricks.com',
 9 |       packages=['notpackage'],
10 |       zip_safe=False)


--------------------------------------------------------------------------------
/notpackage/notfunctions.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql import SparkSession
 2 | spark = SparkSession\
 3 | .builder\
 4 | .getOrCreate()
 5 | 
 6 | def spark_f(n_rows):
 7 |     # The Spark code will execute on the Azure Databricks cluster...
 8 |     n = spark.range(n_rows).count()
 9 |     return n
10 | 
11 | def python_f(n_rows):
12 |     n = len(range(n_rows))
13 |     return n
14 | 
15 | print("Hello, I'm testing my new pipeline!")
16 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | # Python package
 2 | # Create and test a Python package on multiple Python versions.
 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python
 5 | 
 6 | parameters:
 7 | - name: dbrVersion
 8 |   displayName: 'DBR Version to use?'
 9 |   type: string
10 |   default: '6.6.*'
11 |  
12 | # New code merged into the release branch initiaties a build
13 | trigger:
14 | - release
15 | 
16 | #Specify VM
17 | pool:
18 |   vmImage: 'ubuntu-latest'
19 | 
20 | # Install Python. The version must match the version on the Databricks cluster.
21 | steps:
22 | - task: UsePythonVersion@0
23 |   displayName: 'Use Python 3.7'
24 |   inputs:
25 |     versionSpec: 3.7
26 | 
27 | # Install required Python modules, including databricks-connect, required to execute a unit test
28 | # on a cluster.
29 | - script: |
30 |     pip install pytest requests setuptools wheel
31 |     pip install -U databricks-connect==${{ parameters.dbrVersion }} # we may not need databricks-connect
32 |     pip install -U databricks-cli
33 |   displayName: 'Load Python Dependencies'
34 | 
35 | # Use environment variables to pass Databricks login information to the Databricks Connect
36 | # configuration function
37 | - script: |
38 |     echo "y
39 |     $(databricks_host)
40 |     $(databricks_token)
41 |     $(cluster_id)
42 |     $(org_id)
43 |     15001" | databricks-connect configure
44 |   displayName: 'Configure DBConnect'
45 | 
46 | #Download code from designated branch to the agent
47 | - checkout: self
48 |   persistCredentials: true
49 |   clean: true
50 | 
51 | - script: git checkout release
52 |   displayName: 'Get Latest Branch'
53 | 
54 | #Test goes here
55 | 
56 | #Package code into a Py Wheel
57 | - script: |
58 |     cd $(Build.Repository.LocalPath)/
59 |     python3 setup.py sdist bdist_wheel
60 |     ls
61 |     ls dist/
62 |   displayName: 'Build Python Wheel for Libs'
63 | 
64 | # Use git diff to flag files added in the most recent git merge
65 | - script: |
66 |     git diff --name-only --diff-filter=AMR HEAD^1 HEAD | xargs -I '{}' cp --parents -r '{}' $(Build.BinariesDirectory)
67 |     mkdir -p $(Build.BinariesDirectory)/libraries/python/libs
68 |     cp $(Build.Repository.LocalPath)/dist/*.* $(Build.BinariesDirectory)/libraries/python/libs
69 |     mkdir -p $(Build.BinariesDirectory)/cicd-scripts
70 |     cp $(Build.Repository.LocalPath)/cicd-scripts/*.* $(Build.BinariesDirectory)/cicd-scripts
71 |   displayName: 'Get Changes'
72 | # Add the wheel file you just created
73 | # The objective is to add all files intended for the current release.
74 | 
75 | # Create the deployment artifact and publish it to the artifact repository
76 | - task: ArchiveFiles@2
77 |   inputs:
78 |     rootFolderOrFile: '$(Build.BinariesDirectory)'
79 |     includeRootFolder: false
80 |     archiveType: 'zip'
81 |     archiveFile: '$(Build.ArtifactStagingDirectory)/$(Build.BuildId).zip'
82 |     replaceExistingArchive: true
83 | 
84 | - script: |
85 |     databricks fs mkdirs 'dbfs:/FileStore/artifacts/'
86 |     databricks fs cp '$(Build.ArtifactStagingDirectory)/$(Build.BuildId).zip' 'dbfs:/FileStore/artifacts/'
87 |   displayName: "Copy artifact to Databricks"
88 | 
89 | - task: PublishBuildArtifacts@1
90 |   inputs:
91 |     ArtifactName: 'DatabricksBuild'
92 | 


--------------------------------------------------------------------------------
/cicd-scripts/installWhlLibrary.py:
--------------------------------------------------------------------------------
  1 | # installWhlLibrary.py
  2 | #!/usr/bin/python3
  3 | import json
  4 | import requests
  5 | import sys
  6 | import getopt
  7 | import time
  8 | import os
  9 | 
 10 | def main():
 11 |     shard = ''
 12 |     token = ''
 13 |     clusterid = ''
 14 |     libspath = ''
 15 |     dbfspath = ''
 16 | 
 17 |     try:
 18 |         opts, args = getopt.getopt(sys.argv[1:], 'hstcld',
 19 |                                    ['shard=', 'token=', 'clusterid=', 'libs=', 'dbfspath='])
 20 |     except getopt.GetoptError:
 21 |         print(
 22 |             'installWhlLibrary.py -s <shard> -t <token> -c <clusterid> -l <libs> -d <dbfspath>')
 23 |         sys.exit(2)
 24 | 
 25 |     for opt, arg in opts:
 26 |         if opt == '-h':
 27 |             print(
 28 |                 'installWhlLibrary.py -s <shard> -t <token> -c <clusterid> -l <libs> -d <dbfspath>')
 29 |             sys.exit()
 30 |         elif opt in ('-s', '--shard'):
 31 |             shard = arg
 32 |         elif opt in ('-t', '--token'):
 33 |             token = arg
 34 |         elif opt in ('-c', '--clusterid'):
 35 |             clusterid = arg
 36 |         elif opt in ('-l', '--libs'):
 37 |             libspath=arg
 38 |         elif opt in ('-d', '--dbfspath'):
 39 |             dbfspath=arg
 40 | 
 41 |     print('-s is ' + shard)
 42 |     print('-t is ' + token)
 43 |     print('-c is ' + clusterid)
 44 |     print('-l is ' + libspath)
 45 |     print('-d is ' + dbfspath)
 46 | 
 47 |     # Uninstall library if exists on cluster
 48 |     i=0
 49 | 
 50 |     # Generate array from walking local path
 51 |     libslist = []
 52 |     for path, subdirs, files in os.walk(libspath):
 53 |         for name in files:
 54 | 
 55 |             name, file_extension = os.path.splitext(name)
 56 |             if file_extension.lower() in ['.whl']:
 57 |                 libslist.append(name + file_extension.lower())
 58 | 
 59 |     for lib in libslist:
 60 |         dbfslib = dbfspath + '/' + lib
 61 |         print(dbfslib + ' before:' + getLibStatus(shard, token, clusterid, dbfslib))
 62 | 
 63 |         if (getLibStatus(shard, token, clusterid, dbfslib) != 'not found'):
 64 |             print(dbfslib + " exists. Uninstalling.")
 65 |             i = i + 1
 66 |             values = {'cluster_id': clusterid, 'libraries': [{'whl': dbfslib}]}
 67 | 
 68 |             resp = requests.post(shard + '/api/2.0/libraries/uninstall', data=json.dumps(values), auth=("token", token))
 69 |             runjson = resp.text
 70 |             d = json.loads(runjson)
 71 |             print(dbfslib + ' after:' + getLibStatus(shard, token, clusterid, dbfslib))
 72 | 
 73 |             # Restart if libraries uninstalled
 74 |             if i > 0:
 75 |                 values = {'cluster_id': clusterid}
 76 |                 print("Restarting cluster:" + clusterid)
 77 |                 resp = requests.post(shard + '/api/2.0/clusters/restart', data=json.dumps(values), auth=("token", token))
 78 |                 restartjson = resp.text
 79 |                 print(restartjson)
 80 | 
 81 |                 p = 0
 82 |                 waiting = True
 83 |                 while waiting:
 84 |                     time.sleep(30)
 85 |                     clusterresp = requests.get(shard + '/api/2.0/clusters/get?cluster_id=' + clusterid,
 86 |                                            auth=("token", token))
 87 |                     clusterjson = clusterresp.text
 88 |                     jsonout = json.loads(clusterjson)
 89 |                     current_state = jsonout['state']
 90 |                     print(clusterid + " state:" + current_state)
 91 |                     if current_state in ['TERMINATED', 'RUNNING','INTERNAL_ERROR', 'SKIPPED'] or p >= 10:
 92 |                         break
 93 |                     p = p + 1
 94 | 
 95 |         print("Installing " + dbfslib)
 96 |         values = {'cluster_id': clusterid, 'libraries': [{'whl': 'dbfs:' + dbfslib}]}
 97 | 
 98 |         resp = requests.post(shard + '/api/2.0/libraries/install', data=json.dumps(values), auth=("token", token))
 99 |         runjson = resp.text
100 |         d = json.loads(runjson)
101 |         print(dbfslib + ' after:' + getLibStatus(shard, token, clusterid, dbfslib))
102 | 
103 | def getLibStatus(shard, token, clusterid, dbfslib):
104 | 
105 |     resp = requests.get(shard + '/api/2.0/libraries/cluster-status?cluster_id='+ clusterid, auth=("token", token))
106 |     libjson = resp.text
107 |     d = json.loads(libjson)
108 |     if (d.get('library_statuses')):
109 |         statuses = d['library_statuses']
110 | 
111 |         for status in statuses:
112 |             if (status['library'].get('whl')):
113 |                 if (status['library']['whl'] == 'dbfs:' + dbfslib):
114 |                     return status['status']
115 |                 else:
116 |                     return "not found"
117 |     else:
118 |         # No libraries found
119 |         return "not found"
120 | 
121 | if __name__ == '__main__':
122 |     main()


--------------------------------------------------------------------------------