├── Linux - getTweets.py ├── Linux- ADFPythonCustomActivityHandsonLab.docx └── README.md /Linux - getTweets.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import sys 3 | import pydocumentdb.documents as documents 4 | import pydocumentdb.document_client as document_client 5 | import pydocumentdb.errors as errors 6 | 7 | 8 | from azure.keyvault import KeyVaultClient, KeyVaultAuthentication 9 | from azure.common.credentials import ServicePrincipalCredentials 10 | 11 | 12 | class IDisposable: 13 | """ A context manager to automatically close an object with a close method 14 | in a with statement. """ 15 | def __init__(self, obj): 16 | self.obj = obj 17 | def __enter__(self): 18 | return self.obj # bound to target 19 | def __exit__(self, exception_type, exception_val, trace): 20 | # extra cleanup in here 21 | self = None 22 | credentials = None 23 | 24 | def auth_callback(server, resource, scope): 25 | credentials = ServicePrincipalCredentials( 26 | client_id = 'a2fb091f-6d12-4d53-8530-5ae8f9657ef4', #Azure AD APP Application ID 27 | secret = 'cCxlLGcHn6slHE5EMDeG/LV1cbHcU13SqpJQGr9UPd0=', #Secret 28 | tenant = '72f988bf-86f1-41af-91ab-2d7cd011db47', #Azure AD Directory ID 29 | resource = "https://vault.azure.net" 30 | ) 31 | token = credentials.token 32 | return token['token_type'], token['access_token'] 33 | 34 | def insertintoCosmosDB(cdbhost, cdbmasterkey, tweetDate, tweetText): 35 | tweetmessage = {'tweetDate': str(tweetDate),'id' : str(tweetDate).split()[0], 'tweetText': tweetText} 36 | _database_link = 'dbs/tweetdb' 37 | _collection_link = _database_link + '/colls/tweetcollec' 38 | print (tweetmessage) 39 | client = document_client.DocumentClient(cdbhost, {'masterKey': cdbmasterkey}) 40 | #with IDisposable(document_client.DocumentClient(cdbhost, {'masterKey': cdbmasterkey} )) as client: 41 | try: 42 | client.CreateDocument(_collection_link, tweetmessage) 43 | except errors.DocumentDBError as e: 44 | if e.status_code == 409: 45 | pass 46 | else: 47 | raise errors.HTTPFailure(e.status_code) 48 | 49 | def main(): 50 | # Twitter application key 51 | client = KeyVaultClient(KeyVaultAuthentication(auth_callback)) 52 | _appkey = client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "TwitterAPIKey", "06efd3325eec4ffeb0049ee9356a635e") # KeyVault URL, Secret, Version 53 | _appsecret= client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "TwitterAPISecretKey", "e5f3e146a3764fd4ac8d72f4980fbe9c") # KeyVault URL, Secret, Version 54 | _appaccesstoken = client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "TwitterAccessToken", "8e6a5e8a958d4bd495416d243ce83529") # KeyVault URL, Secret, Version 55 | _appaccesstokensecret = client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "TwitterAccessTokenSecret", "426b8feaea3b48d9be4aa2453c6f1a3f") # KeyVault URL, Secret, Version 56 | 57 | _tweetTag= sys.argv[1] # like Azure 58 | _tweetReadSince= sys.argv[2] #date from when you want to read tweets like '2018/07/28' 59 | _RandomId = sys.argv[3] #Azure Data Factory Pipeline ID 'testrun' 60 | 61 | # CosmosDB Credential 62 | _cdbhost = client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "cosmosdbURI", "69554523206d4768b1d096d3450d5238") # KeyVault URL, Secret, Version 63 | _cdbmasterkey = client.get_secret("https://sudhirawkeyvault.vault.azure.net/", "cosmosdbPK", "7c24170c75544fc0b1aa84f3a82a9c70") # KeyVault URL, Secret, Version 64 | 65 | #hashtag, tweetreadsince, filename includes pipeline id, 66 | auth = tweepy.OAuthHandler(_appkey.value, _appsecret.value) 67 | auth.set_access_token(_appaccesstoken.value, _appaccesstokensecret.value) 68 | tweetapi = tweepy.API(auth,wait_on_rate_limit=True) 69 | 70 | for tweet in tweepy.Cursor(tweetapi.search,q=_tweetTag,lang="en", since=_tweetReadSince).items(15): 71 | try: 72 | if tweet.text.encode('utf-8') != '' : 73 | insertintoCosmosDB (_cdbhost.value, _cdbmasterkey.value, tweet.created_at,tweet.text.encode('utf-8')) 74 | except errors.DocumentDBError as e: 75 | if e.status_code == 409: 76 | pass 77 | else: 78 | raise errors.HTTPFailure(e.status_code) 79 | print("Error while fetching and storing tweets!!!") 80 | break 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /Linux- ADFPythonCustomActivityHandsonLab.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rawatsudhir1/ADFPythonCustomActivity/8afc1ddafb73ce85949461c54b1192d388f9bf89/Linux- ADFPythonCustomActivityHandsonLab.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scheduling a ADF Pipeline to execute Python code using ADF Custom Activity 2 | This repository consist of Hands on lab and Pythond code. The Hands on lab describes how to schedule python code in Azure Data Factory. If you want to use 3 | 4 | 5 | **linux ** 6 | 7 | - HOL:- Linux- ADFPythonCustomActivityHandsonLab.docx 8 | 9 | - CodeFile:- Linux - getTweets.py (remove "Linux -" when use the file for demo) 10 | 11 | 12 | If you want to contribute please feel free to do so. 13 | --------------------------------------------------------------------------------