├── DataProfileCheck.py ├── Single-GlueJob-Multiple-Transformation.zip ├── aws-kinesis-redshift-integration.txt ├── concurrentjobs.txt ├── datasourcecode.py ├── datasql.sql ├── glue-etl-pipeline-dataquality-check.txt ├── glue-external-libraries.zip ├── glue_jdbc_parallel_read.ipynb ├── gluejobtransactioncode.py ├── jobcode.py ├── lakeformationtransactioncode.py ├── multi-file-ingestion-code.zip ├── multiple-table-ingestion-pipeline.md ├── pipelinehandler.py ├── pushdownpredicate.ipynb ├── redshift-data-api-etl.zip ├── redshift_sql_workflow_step_functions.zip ├── sensordata.csv └── stepfunctionsetl.py /DataProfileCheck.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import time 3 | import json 4 | 5 | profilejobname = 'salesdataprofilejob' 6 | client = boto3.client('databrew') 7 | 8 | response = client.start_job_run(Name=profilejobname) 9 | runid = response["RunId"] 10 | print(runid) 11 | 12 | response = client.describe_job_run( 13 | Name=profilejobname, 14 | RunId=runid 15 | ) 16 | 17 | status = response["State"] 18 | 19 | while status != "SUCCEEDED": 20 | time.sleep(10) 21 | response = client.describe_job_run(Name=profilejobname, RunId=runid) 22 | status = response["State"] 23 | print("status - " + status + " job in progress...") 24 | 25 | 26 | bucketname = "" 27 | filename = "" 28 | for o in response["Outputs"]: 29 | bucketname = o["Location"]["Bucket"] 30 | if "dq-validation" in o["Location"]["Key"]: 31 | filename = o["Location"]["Key"] 32 | print(bucketname) 33 | print(filename) 34 | 35 | s3 = boto3.resource('s3') 36 | 37 | content_object = s3.Object(bucketname, filename) 38 | file_content = content_object.get()['Body'].read().decode('utf-8') 39 | profilejson = json.loads(file_content) 40 | 41 | for rs in profilejson["rulesetResults"]: 42 | print ('The %s evaluation is %s with the following details:' % (rs["name"], rs["status"])) 43 | for r in rs["ruleResults"]: 44 | print ('%s status = %s' % (r["name"], r["status"])) 45 | -------------------------------------------------------------------------------- /Single-GlueJob-Multiple-Transformation.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-dojo/analytics/7bf14ff1faaba9c4e3f9e9706a8765c272b01208/Single-GlueJob-Multiple-Transformation.zip -------------------------------------------------------------------------------- /aws-kinesis-redshift-integration.txt: -------------------------------------------------------------------------------- 1 | --- IAM Role for Redshift ---- 2 | { 3 | "Version": "2012-10-17", 4 | "Statement": [ 5 | { 6 | "Sid": "ReadStream", 7 | "Effect": "Allow", 8 | "Action": [ 9 | "kinesis:DescribeStreamSummary", 10 | "kinesis:GetShardIterator", 11 | "kinesis:GetRecords", 12 | "kinesis:DescribeStream" 13 | ], 14 | "Resource": "*" 15 | }, 16 | { 17 | "Sid": "ListStream", 18 | "Effect": "Allow", 19 | "Action": [ 20 | "kinesis:ListStreams", 21 | "kinesis:ListShards" 22 | ], 23 | "Resource": "*" 24 | } 25 | ] 26 | } 27 | 28 | --- external schema for kinesis --- 29 | 30 | CREATE EXTERNAL SCHEMA 31 | FROM KINESIS 32 | IAM_ROLE 'arn:aws:iam::123456789012:role/'; 33 | 34 | ---- create materialized view ---- 35 | 36 | CREATE MATERIALIZED VIEW AS 37 | SELECT approximatearrivaltimestamp, 38 | partitionkey, 39 | shardid, 40 | sequencenumber, 41 | json_parse(from_varbyte(data, 'utf-8')) as payload 42 | FROM .""; 43 | 44 | ---- refresh view ---- 45 | 46 | REFRESH MATERIALIZED VIEW ; 47 | 48 | --- select data from view ---- 49 | 50 | select * from 51 | 52 | ---- use notebook code to ingest data ---- 53 | 54 | import boto3 55 | import random 56 | 57 | client = boto3.client('kinesis') 58 | 59 | for x in range(1, 6): 60 | v = x * random.randint(1, 4) 61 | t = x * random.randint(1, 3) 62 | p = x * random.randint(4,7) 63 | mydata = '{ "vibration": ' + str(v) + ', "temperature": ' + str(t) + ', "pressure": ' + str(p) + '}' 64 | partitionkey = random.randint(10, 100); 65 | response = client.put_record(StreamName='', Data=mydata, PartitionKey=str(partitionkey)) 66 | 67 | print("Ingestion Done") 68 | 69 | --- ingested data sample ---- 70 | 71 | { 72 | "vibration": 10, 73 | "temperature": 33, 74 | "pressure": 41 75 | } 76 | 77 | 78 | -------------------------------------------------------------------------------- /concurrentjobs.txt: -------------------------------------------------------------------------------- 1 | ========== ingestionjob code =========== 2 | 3 | from awsglue.transforms import * 4 | from awsglue.utils import getResolvedOptions 5 | from pyspark.context import SparkContext 6 | from awsglue.context import GlueContext 7 | from awsglue.job import Job 8 | 9 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'tablename', 'destination']) 10 | 11 | glueContext = GlueContext(SparkContext.getOrCreate()) 12 | 13 | customerDF = glueContext.create_dynamic_frame.from_catalog( 14 | database="dojodb", 15 | table_name=args['tablename'], redshift_tmp_dir="s3://dojo-dataset/scripts/") 16 | 17 | glueContext.write_dynamic_frame.from_options(customerDF, connection_type = "s3", connection_options = {"path": args['destination']}, format = "csv") 18 | 19 | ==== CLI to Run Jobs ==== 20 | 21 | aws glue start-job-run --job-name ingestionjob --arguments '{"--tablename":"postgres_public_customers","--destination":"s3://dojo-dataset/customers"}' 22 | 23 | aws glue start-job-run --job-name ingestionjob --arguments '{"--tablename":"postgres_public_employees","--destination":"s3://dojo-dataset/employees"}' -------------------------------------------------------------------------------- /datasourcecode.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | 8 | glueContext = GlueContext(SparkContext.getOrCreate()) 9 | 10 | df = glueContext.create_dynamic_frame.from_catalog( 11 | database="dojodb", 12 | table_name="customers") 13 | df.printSchema() 14 | 15 | df = SelectFields.apply(df,["contactfirstname","contactlastname"]) 16 | df.printSchema() 17 | 18 | glueContext.write_dynamic_frame.from_options(df, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/output/"}, format = "csv") 19 | 20 | df = glueContext.create_dynamic_frame.from_catalog( 21 | database="dojodb", 22 | table_name="dev_public_users", redshift_tmp_dir="s3://dojo-dataset/temp/") 23 | df.printSchema() 24 | 25 | df = SelectFields.apply(df,["username","firstname","lastname"]) 26 | df.printSchema() 27 | 28 | glueContext.write_dynamic_frame.from_jdbc_conf(df, catalog_connection = "redshiftconnection", 29 | connection_options = {"dbtable": "usermini", "database": "dev"}, 30 | redshift_tmp_dir = "s3://dojo-dataset/temp/") 31 | 32 | df = glueContext.create_dynamic_frame.from_catalog( 33 | database="dojodb", 34 | table_name="postgres_public_employees", redshift_tmp_dir="s3://dojo-dataset/temp/") 35 | df.printSchema() 36 | 37 | df = SelectFields.apply(df,["firstname","lastname"]) 38 | df.printSchema() 39 | 40 | glueContext.write_dynamic_frame.from_jdbc_conf(df, catalog_connection = "rdsconnection", 41 | connection_options = {"dbtable": "employeesmini", "database": "postgres"}, 42 | redshift_tmp_dir = "s3://dojo-dataset/temp/") 43 | 44 | glueContext.write_dynamic_frame.from_options(df, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/rdsoutput/"}, format = "csv") 45 | 46 | 47 | -------------------------------------------------------------------------------- /datasql.sql: -------------------------------------------------------------------------------- 1 | create table books ( 2 | bookid int, 3 | bookname varchar, 4 | publisher varchar, 5 | author varchar, 6 | publishyear int, 7 | publishmonth int 8 | ); 9 | 10 | insert into books values (1,'bookname1','publsiher1','author1',2004,8), (2,'bookname2','publsiher2','author2',2007,2), (3,'bookname3','publsiher3','author3',2001,31), (4,'bookname4','publsiher4','author4',2003,29), (5,'bookname5','publsiher5','author5',2010,27), (6,'bookname6','publsiher6','author6',2003,8), (7,'bookname7','publsiher7','author7',2008,6), (8,'bookname8','publsiher8','author8',2001,22), (9,'bookname9','publsiher9','author9',2001,3), (10,'bookname10','publsiher10','author10',2005,8), (11,'bookname11','publsiher11','author11',2001,16), (12,'bookname12','publsiher12','author12',2007,5), (13,'bookname13','publsiher13','author13',2009,10), (14,'bookname14','publsiher14','author14',2005,19), (15,'bookname15','publsiher15','author15',2008,1), (16,'bookname16','publsiher16','author16',2008,23), (17,'bookname17','publsiher17','author17',2010,23), (18,'bookname18','publsiher18','author18',2006,11), (19,'bookname19','publsiher19','author19',2006,24), (20,'bookname20','publsiher20','author20',2004,21), (21,'bookname21','publsiher21','author21',2006,6), (22,'bookname22','publsiher22','author22',2010,23), (23,'bookname23','publsiher23','author23',2005,17), (24,'bookname24','publsiher24','author24',2005,1), (25,'bookname25','publsiher25','author25',2002,23), (26,'bookname26','publsiher26','author26',2002,22), (27,'bookname27','publsiher27','author27',2009,15), (28,'bookname28','publsiher28','author28',2009,5), (29,'bookname29','publsiher29','author29',2008,23), (30,'bookname30','publsiher30','author30',2002,14), (31,'bookname31','publsiher31','author31',2003,20), (32,'bookname32','publsiher32','author32',2008,19), (33,'bookname33','publsiher33','author33',2010,13), (34,'bookname34','publsiher34','author34',2008,11), (35,'bookname35','publsiher35','author35',2002,21), (36,'bookname36','publsiher36','author36',2005,2), (37,'bookname37','publsiher37','author37',2005,1), (38,'bookname38','publsiher38','author38',2009,16), (39,'bookname39','publsiher39','author39',2001,22), (40,'bookname40','publsiher40','author40',2010,15), (41,'bookname41','publsiher41','author41',2010,18), (42,'bookname42','publsiher42','author42',2004,18), (43,'bookname43','publsiher43','author43',2004,16), (44,'bookname44','publsiher44','author44',2010,6), (45,'bookname45','publsiher45','author45',2008,20), (46,'bookname46','publsiher46','author46',2002,19), (47,'bookname47','publsiher47','author47',2005,20), (48,'bookname48','publsiher48','author48',2003,14), (49,'bookname49','publsiher49','author49',2007,19), (50,'bookname50','publsiher50','author50',2005,31), (51,'bookname51','publsiher51','author51',2010,7), (52,'bookname52','publsiher52','author52',2007,30), (53,'bookname53','publsiher53','author53',2003,13), (54,'bookname54','publsiher54','author54',2005,15), (55,'bookname55','publsiher55','author55',2004,4), (56,'bookname56','publsiher56','author56',2009,14), (57,'bookname57','publsiher57','author57',2003,8), (58,'bookname58','publsiher58','author58',2006,24), (59,'bookname59','publsiher59','author59',2001,17), (60,'bookname60','publsiher60','author60',2010,30), (61,'bookname61','publsiher61','author61',2008,12), (62,'bookname62','publsiher62','author62',2003,18), (63,'bookname63','publsiher63','author63',2003,2), (64,'bookname64','publsiher64','author64',2005,25), (65,'bookname65','publsiher65','author65',2006,7), (66,'bookname66','publsiher66','author66',2003,6), (67,'bookname67','publsiher67','author67',2008,17), (68,'bookname68','publsiher68','author68',2009,28), (69,'bookname69','publsiher69','author69',2004,23), (70,'bookname70','publsiher70','author70',2007,31), (71,'bookname71','publsiher71','author71',2009,21), (72,'bookname72','publsiher72','author72',2006,23), (73,'bookname73','publsiher73','author73',2005,10), (74,'bookname74','publsiher74','author74',2003,18), (75,'bookname75','publsiher75','author75',2004,14), (76,'bookname76','publsiher76','author76',2002,25), (77,'bookname77','publsiher77','author77',2004,18), (78,'bookname78','publsiher78','author78',2008,16), (79,'bookname79','publsiher79','author79',2007,23), (80,'bookname80','publsiher80','author80',2002,31), (81,'bookname81','publsiher81','author81',2009,1), (82,'bookname82','publsiher82','author82',2007,24), (83,'bookname83','publsiher83','author83',2005,11), (84,'bookname84','publsiher84','author84',2006,23), (85,'bookname85','publsiher85','author85',2008,25), (86,'bookname86','publsiher86','author86',2001,1), (87,'bookname87','publsiher87','author87',2007,16), (88,'bookname88','publsiher88','author88',2010,10), (89,'bookname89','publsiher89','author89',2001,12), (90,'bookname90','publsiher90','author90',2006,26), (91,'bookname91','publsiher91','author91',2010,7), (92,'bookname92','publsiher92','author92',2001,3), (93,'bookname93','publsiher93','author93',2002,3), (94,'bookname94','publsiher94','author94',2001,31), (95,'bookname95','publsiher95','author95',2004,16), (96,'bookname96','publsiher96','author96',2004,11), (97,'bookname97','publsiher97','author97',2002,18), (98,'bookname98','publsiher98','author98',2009,10), (99,'bookname99','publsiher99','author99',2001,20), (100,'bookname100','publsiher100','author100',2002,16), (101,'bookname101','publsiher101','author101',2001,3), (102,'bookname102','publsiher102','author102',2010,21), (103,'bookname103','publsiher103','author103',2002,25), (104,'bookname104','publsiher104','author104',2006,25), (105,'bookname105','publsiher105','author105',2004,7), (106,'bookname106','publsiher106','author106',2008,6), (107,'bookname107','publsiher107','author107',2009,12), (108,'bookname108','publsiher108','author108',2005,19), (109,'bookname109','publsiher109','author109',2002,21), (110,'bookname110','publsiher110','author110',2005,4), (111,'bookname111','publsiher111','author111',2004,31), (112,'bookname112','publsiher112','author112',2008,20), (113,'bookname113','publsiher113','author113',2008,10), (114,'bookname114','publsiher114','author114',2009,10), (115,'bookname115','publsiher115','author115',2006,4), (116,'bookname116','publsiher116','author116',2006,27), (117,'bookname117','publsiher117','author117',2001,23), (118,'bookname118','publsiher118','author118',2007,29), (119,'bookname119','publsiher119','author119',2003,15), (120,'bookname120','publsiher120','author120',2006,10), (121,'bookname121','publsiher121','author121',2001,14), (122,'bookname122','publsiher122','author122',2004,2), (123,'bookname123','publsiher123','author123',2010,14), (124,'bookname124','publsiher124','author124',2009,13), (125,'bookname125','publsiher125','author125',2007,6), (126,'bookname126','publsiher126','author126',2003,15), (127,'bookname127','publsiher127','author127',2001,6), (128,'bookname128','publsiher128','author128',2001,11), (129,'bookname129','publsiher129','author129',2009,1), (130,'bookname130','publsiher130','author130',2010,29), (131,'bookname131','publsiher131','author131',2001,24), (132,'bookname132','publsiher132','author132',2004,8), (133,'bookname133','publsiher133','author133',2010,27), (134,'bookname134','publsiher134','author134',2005,17), (135,'bookname135','publsiher135','author135',2005,1), (136,'bookname136','publsiher136','author136',2007,2), (137,'bookname137','publsiher137','author137',2001,28), (138,'bookname138','publsiher138','author138',2010,12), (139,'bookname139','publsiher139','author139',2004,15), (140,'bookname140','publsiher140','author140',2004,21), (141,'bookname141','publsiher141','author141',2007,18), (142,'bookname142','publsiher142','author142',2004,2), (143,'bookname143','publsiher143','author143',2002,12), (144,'bookname144','publsiher144','author144',2007,13), (145,'bookname145','publsiher145','author145',2007,12), (146,'bookname146','publsiher146','author146',2003,28), (147,'bookname147','publsiher147','author147',2003,22), (148,'bookname148','publsiher148','author148',2007,26), (149,'bookname149','publsiher149','author149',2003,9), (150,'bookname150','publsiher150','author150',2009,22), (151,'bookname151','publsiher151','author151',2001,10), (152,'bookname152','publsiher152','author152',2006,22), (153,'bookname153','publsiher153','author153',2006,5), (154,'bookname154','publsiher154','author154',2003,28), (155,'bookname155','publsiher155','author155',2006,3), (156,'bookname156','publsiher156','author156',2006,18), (157,'bookname157','publsiher157','author157',2008,6), (158,'bookname158','publsiher158','author158',2003,28), (159,'bookname159','publsiher159','author159',2005,6), (160,'bookname160','publsiher160','author160',2004,25), (161,'bookname161','publsiher161','author161',2006,27), (162,'bookname162','publsiher162','author162',2009,29), (163,'bookname163','publsiher163','author163',2004,2), (164,'bookname164','publsiher164','author164',2008,27), (165,'bookname165','publsiher165','author165',2006,22), (166,'bookname166','publsiher166','author166',2005,2), (167,'bookname167','publsiher167','author167',2007,3), (168,'bookname168','publsiher168','author168',2002,8), (169,'bookname169','publsiher169','author169',2003,22), (170,'bookname170','publsiher170','author170',2001,14), (171,'bookname171','publsiher171','author171',2009,3), (172,'bookname172','publsiher172','author172',2007,12), (173,'bookname173','publsiher173','author173',2002,1), (174,'bookname174','publsiher174','author174',2001,24), (175,'bookname175','publsiher175','author175',2005,29), (176,'bookname176','publsiher176','author176',2005,23), (177,'bookname177','publsiher177','author177',2001,28), (178,'bookname178','publsiher178','author178',2007,28), (179,'bookname179','publsiher179','author179',2008,21), (180,'bookname180','publsiher180','author180',2001,18), (181,'bookname181','publsiher181','author181',2007,29), (182,'bookname182','publsiher182','author182',2003,13), (183,'bookname183','publsiher183','author183',2004,3), (184,'bookname184','publsiher184','author184',2002,30), (185,'bookname185','publsiher185','author185',2005,25), (186,'bookname186','publsiher186','author186',2002,31), (187,'bookname187','publsiher187','author187',2006,28), (188,'bookname188','publsiher188','author188',2005,29), (189,'bookname189','publsiher189','author189',2006,21), (190,'bookname190','publsiher190','author190',2004,1), (191,'bookname191','publsiher191','author191',2002,15), (192,'bookname192','publsiher192','author192',2002,24), (193,'bookname193','publsiher193','author193',2008,9), (194,'bookname194','publsiher194','author194',2005,5), (195,'bookname195','publsiher195','author195',2008,7), (196,'bookname196','publsiher196','author196',2001,7), (197,'bookname197','publsiher197','author197',2010,6), (198,'bookname198','publsiher198','author198',2008,14), (199,'bookname199','publsiher199','author199',2004,14), (200,'bookname200','publsiher200','author200',2004,28), (201,'bookname201','publsiher201','author201',2010,21), (202,'bookname202','publsiher202','author202',2010,16), (203,'bookname203','publsiher203','author203',2007,17), (204,'bookname204','publsiher204','author204',2003,22), (205,'bookname205','publsiher205','author205',2009,22), (206,'bookname206','publsiher206','author206',2004,26), (207,'bookname207','publsiher207','author207',2008,30), (208,'bookname208','publsiher208','author208',2007,17), (209,'bookname209','publsiher209','author209',2001,21), (210,'bookname210','publsiher210','author210',2004,16), (211,'bookname211','publsiher211','author211',2002,25), (212,'bookname212','publsiher212','author212',2004,17), (213,'bookname213','publsiher213','author213',2010,12), (214,'bookname214','publsiher214','author214',2010,2), (215,'bookname215','publsiher215','author215',2007,14), (216,'bookname216','publsiher216','author216',2007,31), (217,'bookname217','publsiher217','author217',2003,7), (218,'bookname218','publsiher218','author218',2009,1), (219,'bookname219','publsiher219','author219',2002,10), (220,'bookname220','publsiher220','author220',2006,17), (221,'bookname221','publsiher221','author221',2004,12), (222,'bookname222','publsiher222','author222',2001,19), (223,'bookname223','publsiher223','author223',2008,25), (224,'bookname224','publsiher224','author224',2004,8), (225,'bookname225','publsiher225','author225',2004,24), (226,'bookname226','publsiher226','author226',2007,24), (227,'bookname227','publsiher227','author227',2010,19), (228,'bookname228','publsiher228','author228',2005,18), (229,'bookname229','publsiher229','author229',2002,17), (230,'bookname230','publsiher230','author230',2003,17), (231,'bookname231','publsiher231','author231',2004,11), (232,'bookname232','publsiher232','author232',2009,30), (233,'bookname233','publsiher233','author233',2010,13), (234,'bookname234','publsiher234','author234',2005,17), (235,'bookname235','publsiher235','author235',2010,10), (236,'bookname236','publsiher236','author236',2006,18), (237,'bookname237','publsiher237','author237',2003,1), (238,'bookname238','publsiher238','author238',2008,8), (239,'bookname239','publsiher239','author239',2006,19), (240,'bookname240','publsiher240','author240',2002,3), (241,'bookname241','publsiher241','author241',2003,20), (242,'bookname242','publsiher242','author242',2003,24), (243,'bookname243','publsiher243','author243',2005,23), (244,'bookname244','publsiher244','author244',2001,29), (245,'bookname245','publsiher245','author245',2003,24), (246,'bookname246','publsiher246','author246',2009,8), (247,'bookname247','publsiher247','author247',2003,24), (248,'bookname248','publsiher248','author248',2001,18), (249,'bookname249','publsiher249','author249',2003,10), (250,'bookname250','publsiher250','author250',2001,3), (251,'bookname251','publsiher251','author251',2010,11), (252,'bookname252','publsiher252','author252',2006,5), (253,'bookname253','publsiher253','author253',2005,15), (254,'bookname254','publsiher254','author254',2002,7), (255,'bookname255','publsiher255','author255',2010,26), (256,'bookname256','publsiher256','author256',2002,17), (257,'bookname257','publsiher257','author257',2003,27), (258,'bookname258','publsiher258','author258',2004,20), (259,'bookname259','publsiher259','author259',2006,2), (260,'bookname260','publsiher260','author260',2009,5), (261,'bookname261','publsiher261','author261',2005,27), (262,'bookname262','publsiher262','author262',2001,17), (263,'bookname263','publsiher263','author263',2003,3), (264,'bookname264','publsiher264','author264',2001,27), (265,'bookname265','publsiher265','author265',2008,26), (266,'bookname266','publsiher266','author266',2010,21), (267,'bookname267','publsiher267','author267',2003,1), (268,'bookname268','publsiher268','author268',2005,1), (269,'bookname269','publsiher269','author269',2003,28), (270,'bookname270','publsiher270','author270',2004,3), (271,'bookname271','publsiher271','author271',2009,22), (272,'bookname272','publsiher272','author272',2005,2), (273,'bookname273','publsiher273','author273',2004,5), (274,'bookname274','publsiher274','author274',2002,29), (275,'bookname275','publsiher275','author275',2003,17), (276,'bookname276','publsiher276','author276',2006,17), (277,'bookname277','publsiher277','author277',2003,5), (278,'bookname278','publsiher278','author278',2009,26), (279,'bookname279','publsiher279','author279',2008,31), (280,'bookname280','publsiher280','author280',2007,26), (281,'bookname281','publsiher281','author281',2009,20), (282,'bookname282','publsiher282','author282',2004,11), (283,'bookname283','publsiher283','author283',2006,3), (284,'bookname284','publsiher284','author284',2004,13), (285,'bookname285','publsiher285','author285',2005,11), (286,'bookname286','publsiher286','author286',2005,13), (287,'bookname287','publsiher287','author287',2008,8), (288,'bookname288','publsiher288','author288',2005,19), (289,'bookname289','publsiher289','author289',2008,16), (290,'bookname290','publsiher290','author290',2010,26), (291,'bookname291','publsiher291','author291',2010,12), (292,'bookname292','publsiher292','author292',2001,28), (293,'bookname293','publsiher293','author293',2005,31), (294,'bookname294','publsiher294','author294',2004,16), (295,'bookname295','publsiher295','author295',2009,31), (296,'bookname296','publsiher296','author296',2009,3), (297,'bookname297','publsiher297','author297',2001,19), (298,'bookname298','publsiher298','author298',2006,22), (299,'bookname299','publsiher299','author299',2006,18), (300,'bookname300','publsiher300','author300',2010,22), (301,'bookname301','publsiher301','author301',2007,31), (302,'bookname302','publsiher302','author302',2007,30), (303,'bookname303','publsiher303','author303',2001,30), (304,'bookname304','publsiher304','author304',2009,31), (305,'bookname305','publsiher305','author305',2004,20), (306,'bookname306','publsiher306','author306',2009,13), (307,'bookname307','publsiher307','author307',2006,31), (308,'bookname308','publsiher308','author308',2009,26), (309,'bookname309','publsiher309','author309',2005,23), (310,'bookname310','publsiher310','author310',2008,4), (311,'bookname311','publsiher311','author311',2005,17), (312,'bookname312','publsiher312','author312',2005,19), (313,'bookname313','publsiher313','author313',2003,22), (314,'bookname314','publsiher314','author314',2001,9), (315,'bookname315','publsiher315','author315',2009,6), (316,'bookname316','publsiher316','author316',2009,3), (317,'bookname317','publsiher317','author317',2005,27), (318,'bookname318','publsiher318','author318',2001,7), (319,'bookname319','publsiher319','author319',2005,23), (320,'bookname320','publsiher320','author320',2001,10), (321,'bookname321','publsiher321','author321',2007,21), (322,'bookname322','publsiher322','author322',2003,23), (323,'bookname323','publsiher323','author323',2006,21), (324,'bookname324','publsiher324','author324',2001,14), (325,'bookname325','publsiher325','author325',2002,16), (326,'bookname326','publsiher326','author326',2005,7), (327,'bookname327','publsiher327','author327',2006,19), (328,'bookname328','publsiher328','author328',2006,24), (329,'bookname329','publsiher329','author329',2004,10), (330,'bookname330','publsiher330','author330',2008,7), (331,'bookname331','publsiher331','author331',2009,3), (332,'bookname332','publsiher332','author332',2003,5), (333,'bookname333','publsiher333','author333',2005,22), (334,'bookname334','publsiher334','author334',2002,19), (335,'bookname335','publsiher335','author335',2007,18), (336,'bookname336','publsiher336','author336',2003,13), (337,'bookname337','publsiher337','author337',2004,10), (338,'bookname338','publsiher338','author338',2008,11), (339,'bookname339','publsiher339','author339',2005,25), (340,'bookname340','publsiher340','author340',2005,12), (341,'bookname341','publsiher341','author341',2003,10), (342,'bookname342','publsiher342','author342',2002,24), (343,'bookname343','publsiher343','author343',2003,18), (344,'bookname344','publsiher344','author344',2009,13), (345,'bookname345','publsiher345','author345',2002,15), (346,'bookname346','publsiher346','author346',2004,12), (347,'bookname347','publsiher347','author347',2005,10), (348,'bookname348','publsiher348','author348',2002,3), (349,'bookname349','publsiher349','author349',2007,13), (350,'bookname350','publsiher350','author350',2007,12), (351,'bookname351','publsiher351','author351',2002,23), (352,'bookname352','publsiher352','author352',2005,21), (353,'bookname353','publsiher353','author353',2004,4), (354,'bookname354','publsiher354','author354',2010,10), (355,'bookname355','publsiher355','author355',2009,8), (356,'bookname356','publsiher356','author356',2004,5), (357,'bookname357','publsiher357','author357',2002,1), (358,'bookname358','publsiher358','author358',2001,31), (359,'bookname359','publsiher359','author359',2002,4), (360,'bookname360','publsiher360','author360',2007,2), (361,'bookname361','publsiher361','author361',2007,4), (362,'bookname362','publsiher362','author362',2004,16), (363,'bookname363','publsiher363','author363',2006,29), (364,'bookname364','publsiher364','author364',2007,9), (365,'bookname365','publsiher365','author365',2003,2), (366,'bookname366','publsiher366','author366',2007,13), (367,'bookname367','publsiher367','author367',2003,17), (368,'bookname368','publsiher368','author368',2010,25), (369,'bookname369','publsiher369','author369',2006,20), (370,'bookname370','publsiher370','author370',2008,21), (371,'bookname371','publsiher371','author371',2004,19), (372,'bookname372','publsiher372','author372',2008,6), (373,'bookname373','publsiher373','author373',2004,3), (374,'bookname374','publsiher374','author374',2009,24), (375,'bookname375','publsiher375','author375',2005,23), (376,'bookname376','publsiher376','author376',2010,3), (377,'bookname377','publsiher377','author377',2005,12), (378,'bookname378','publsiher378','author378',2007,26), (379,'bookname379','publsiher379','author379',2007,3), (380,'bookname380','publsiher380','author380',2006,24), (381,'bookname381','publsiher381','author381',2001,19), (382,'bookname382','publsiher382','author382',2010,20), (383,'bookname383','publsiher383','author383',2005,27), (384,'bookname384','publsiher384','author384',2001,11), (385,'bookname385','publsiher385','author385',2008,24), (386,'bookname386','publsiher386','author386',2009,20), (387,'bookname387','publsiher387','author387',2005,14), (388,'bookname388','publsiher388','author388',2005,5), (389,'bookname389','publsiher389','author389',2005,12), (390,'bookname390','publsiher390','author390',2001,19), (391,'bookname391','publsiher391','author391',2007,3), (392,'bookname392','publsiher392','author392',2010,22), (393,'bookname393','publsiher393','author393',2001,4), (394,'bookname394','publsiher394','author394',2004,29), (395,'bookname395','publsiher395','author395',2004,11), (396,'bookname396','publsiher396','author396',2002,14), (397,'bookname397','publsiher397','author397',2003,27), (398,'bookname398','publsiher398','author398',2009,15), (399,'bookname399','publsiher399','author399',2010,14), (400,'bookname400','publsiher400','author400',2008,17), (401,'bookname401','publsiher401','author401',2010,11), (402,'bookname402','publsiher402','author402',2002,25), (403,'bookname403','publsiher403','author403',2009,21), (404,'bookname404','publsiher404','author404',2010,8), (405,'bookname405','publsiher405','author405',2004,26), (406,'bookname406','publsiher406','author406',2002,6), (407,'bookname407','publsiher407','author407',2001,13), (408,'bookname408','publsiher408','author408',2005,14), (409,'bookname409','publsiher409','author409',2003,14), (410,'bookname410','publsiher410','author410',2010,9), (411,'bookname411','publsiher411','author411',2005,20), (412,'bookname412','publsiher412','author412',2008,1), (413,'bookname413','publsiher413','author413',2005,2), (414,'bookname414','publsiher414','author414',2006,18), (415,'bookname415','publsiher415','author415',2005,3), (416,'bookname416','publsiher416','author416',2003,25), (417,'bookname417','publsiher417','author417',2002,11), (418,'bookname418','publsiher418','author418',2005,29), (419,'bookname419','publsiher419','author419',2006,20), (420,'bookname420','publsiher420','author420',2006,15), (421,'bookname421','publsiher421','author421',2004,29), (422,'bookname422','publsiher422','author422',2007,12), (423,'bookname423','publsiher423','author423',2005,8), (424,'bookname424','publsiher424','author424',2002,25), (425,'bookname425','publsiher425','author425',2002,31), (426,'bookname426','publsiher426','author426',2010,1), (427,'bookname427','publsiher427','author427',2004,20), (428,'bookname428','publsiher428','author428',2006,11), (429,'bookname429','publsiher429','author429',2002,9), (430,'bookname430','publsiher430','author430',2009,27), (431,'bookname431','publsiher431','author431',2003,30), (432,'bookname432','publsiher432','author432',2004,1), (433,'bookname433','publsiher433','author433',2007,23), (434,'bookname434','publsiher434','author434',2010,1), (435,'bookname435','publsiher435','author435',2001,12), (436,'bookname436','publsiher436','author436',2008,2), (437,'bookname437','publsiher437','author437',2008,30), (438,'bookname438','publsiher438','author438',2009,19), (439,'bookname439','publsiher439','author439',2005,29), (440,'bookname440','publsiher440','author440',2003,15), (441,'bookname441','publsiher441','author441',2010,1), (442,'bookname442','publsiher442','author442',2008,13), (443,'bookname443','publsiher443','author443',2003,4), (444,'bookname444','publsiher444','author444',2006,28), (445,'bookname445','publsiher445','author445',2003,16), (446,'bookname446','publsiher446','author446',2008,15), (447,'bookname447','publsiher447','author447',2007,22), (448,'bookname448','publsiher448','author448',2010,31), (449,'bookname449','publsiher449','author449',2006,15), (450,'bookname450','publsiher450','author450',2010,28), (451,'bookname451','publsiher451','author451',2009,29), (452,'bookname452','publsiher452','author452',2007,14), (453,'bookname453','publsiher453','author453',2008,16), (454,'bookname454','publsiher454','author454',2002,19), (455,'bookname455','publsiher455','author455',2003,7), (456,'bookname456','publsiher456','author456',2009,13), (457,'bookname457','publsiher457','author457',2005,9), (458,'bookname458','publsiher458','author458',2006,9), (459,'bookname459','publsiher459','author459',2005,4), (460,'bookname460','publsiher460','author460',2007,21), (461,'bookname461','publsiher461','author461',2003,19), (462,'bookname462','publsiher462','author462',2008,7), (463,'bookname463','publsiher463','author463',2007,29), (464,'bookname464','publsiher464','author464',2005,6), (465,'bookname465','publsiher465','author465',2001,23), (466,'bookname466','publsiher466','author466',2005,17), (467,'bookname467','publsiher467','author467',2001,14), (468,'bookname468','publsiher468','author468',2004,21), (469,'bookname469','publsiher469','author469',2008,5), (470,'bookname470','publsiher470','author470',2003,26), (471,'bookname471','publsiher471','author471',2005,16), (472,'bookname472','publsiher472','author472',2001,25), (473,'bookname473','publsiher473','author473',2006,31), (474,'bookname474','publsiher474','author474',2009,25), (475,'bookname475','publsiher475','author475',2001,14), (476,'bookname476','publsiher476','author476',2008,1), (477,'bookname477','publsiher477','author477',2006,23), (478,'bookname478','publsiher478','author478',2005,24), (479,'bookname479','publsiher479','author479',2006,30), (480,'bookname480','publsiher480','author480',2007,13), (481,'bookname481','publsiher481','author481',2010,30), (482,'bookname482','publsiher482','author482',2002,6), (483,'bookname483','publsiher483','author483',2006,19), (484,'bookname484','publsiher484','author484',2009,22), (485,'bookname485','publsiher485','author485',2005,6), (486,'bookname486','publsiher486','author486',2004,13), (487,'bookname487','publsiher487','author487',2006,15), (488,'bookname488','publsiher488','author488',2003,17), (489,'bookname489','publsiher489','author489',2001,8), (490,'bookname490','publsiher490','author490',2005,21), (491,'bookname491','publsiher491','author491',2009,5), (492,'bookname492','publsiher492','author492',2007,21), (493,'bookname493','publsiher493','author493',2006,27), (494,'bookname494','publsiher494','author494',2002,4), (495,'bookname495','publsiher495','author495',2005,26), (496,'bookname496','publsiher496','author496',2004,3), (497,'bookname497','publsiher497','author497',2005,3), (498,'bookname498','publsiher498','author498',2007,2), (499,'bookname499','publsiher499','author499',2006,10), (500,'bookname500','publsiher500','author500',2005,6), (501,'bookname501','publsiher501','author501',2004,10), (502,'bookname502','publsiher502','author502',2009,11), (503,'bookname503','publsiher503','author503',2001,12), (504,'bookname504','publsiher504','author504',2006,12), (505,'bookname505','publsiher505','author505',2006,5), (506,'bookname506','publsiher506','author506',2010,26), (507,'bookname507','publsiher507','author507',2008,16), (508,'bookname508','publsiher508','author508',2003,13), (509,'bookname509','publsiher509','author509',2004,1), (510,'bookname510','publsiher510','author510',2010,15), (511,'bookname511','publsiher511','author511',2010,23), (512,'bookname512','publsiher512','author512',2003,13), (513,'bookname513','publsiher513','author513',2005,18), (514,'bookname514','publsiher514','author514',2007,9), (515,'bookname515','publsiher515','author515',2002,24), (516,'bookname516','publsiher516','author516',2010,8), (517,'bookname517','publsiher517','author517',2002,14), (518,'bookname518','publsiher518','author518',2003,11), (519,'bookname519','publsiher519','author519',2009,26), (520,'bookname520','publsiher520','author520',2008,12), (521,'bookname521','publsiher521','author521',2005,24), (522,'bookname522','publsiher522','author522',2001,4), (523,'bookname523','publsiher523','author523',2010,19), (524,'bookname524','publsiher524','author524',2004,28), (525,'bookname525','publsiher525','author525',2004,23), (526,'bookname526','publsiher526','author526',2004,14), (527,'bookname527','publsiher527','author527',2005,11), (528,'bookname528','publsiher528','author528',2010,24), (529,'bookname529','publsiher529','author529',2008,30), (530,'bookname530','publsiher530','author530',2009,6), (531,'bookname531','publsiher531','author531',2008,23), (532,'bookname532','publsiher532','author532',2008,26), (533,'bookname533','publsiher533','author533',2002,6), (534,'bookname534','publsiher534','author534',2009,13), (535,'bookname535','publsiher535','author535',2010,11), (536,'bookname536','publsiher536','author536',2001,23), (537,'bookname537','publsiher537','author537',2010,20), (538,'bookname538','publsiher538','author538',2006,12), (539,'bookname539','publsiher539','author539',2002,22), (540,'bookname540','publsiher540','author540',2008,13), (541,'bookname541','publsiher541','author541',2007,7), (542,'bookname542','publsiher542','author542',2001,4), (543,'bookname543','publsiher543','author543',2003,25), (544,'bookname544','publsiher544','author544',2002,5), (545,'bookname545','publsiher545','author545',2009,12), (546,'bookname546','publsiher546','author546',2004,3), (547,'bookname547','publsiher547','author547',2008,10), (548,'bookname548','publsiher548','author548',2010,3), (549,'bookname549','publsiher549','author549',2010,8), (550,'bookname550','publsiher550','author550',2001,21), (551,'bookname551','publsiher551','author551',2001,14), (552,'bookname552','publsiher552','author552',2002,11), (553,'bookname553','publsiher553','author553',2001,5), (554,'bookname554','publsiher554','author554',2008,9), (555,'bookname555','publsiher555','author555',2007,21), (556,'bookname556','publsiher556','author556',2004,6), (557,'bookname557','publsiher557','author557',2003,20), (558,'bookname558','publsiher558','author558',2002,5), (559,'bookname559','publsiher559','author559',2004,30), (560,'bookname560','publsiher560','author560',2001,15), (561,'bookname561','publsiher561','author561',2006,14), (562,'bookname562','publsiher562','author562',2004,18), (563,'bookname563','publsiher563','author563',2007,28), (564,'bookname564','publsiher564','author564',2010,22), (565,'bookname565','publsiher565','author565',2009,27), (566,'bookname566','publsiher566','author566',2008,15), (567,'bookname567','publsiher567','author567',2007,26), (568,'bookname568','publsiher568','author568',2001,8), (569,'bookname569','publsiher569','author569',2004,17), (570,'bookname570','publsiher570','author570',2008,22), (571,'bookname571','publsiher571','author571',2008,6), (572,'bookname572','publsiher572','author572',2008,6), (573,'bookname573','publsiher573','author573',2006,6), (574,'bookname574','publsiher574','author574',2008,21), (575,'bookname575','publsiher575','author575',2007,31), (576,'bookname576','publsiher576','author576',2009,17), (577,'bookname577','publsiher577','author577',2002,21), (578,'bookname578','publsiher578','author578',2005,19), (579,'bookname579','publsiher579','author579',2006,13), (580,'bookname580','publsiher580','author580',2003,25), (581,'bookname581','publsiher581','author581',2008,20), (582,'bookname582','publsiher582','author582',2010,11), (583,'bookname583','publsiher583','author583',2007,7), (584,'bookname584','publsiher584','author584',2002,18), (585,'bookname585','publsiher585','author585',2010,2), (586,'bookname586','publsiher586','author586',2007,28), (587,'bookname587','publsiher587','author587',2003,20), (588,'bookname588','publsiher588','author588',2006,28), (589,'bookname589','publsiher589','author589',2008,3), (590,'bookname590','publsiher590','author590',2006,9), (591,'bookname591','publsiher591','author591',2007,31), (592,'bookname592','publsiher592','author592',2004,8), (593,'bookname593','publsiher593','author593',2001,23), (594,'bookname594','publsiher594','author594',2002,26), (595,'bookname595','publsiher595','author595',2006,15), (596,'bookname596','publsiher596','author596',2004,17), (597,'bookname597','publsiher597','author597',2005,2), (598,'bookname598','publsiher598','author598',2001,24), (599,'bookname599','publsiher599','author599',2002,12), (600,'bookname600','publsiher600','author600',2005,15), (601,'bookname601','publsiher601','author601',2002,26), (602,'bookname602','publsiher602','author602',2009,3), (603,'bookname603','publsiher603','author603',2006,12), (604,'bookname604','publsiher604','author604',2005,20), (605,'bookname605','publsiher605','author605',2005,4), (606,'bookname606','publsiher606','author606',2009,30), (607,'bookname607','publsiher607','author607',2010,26), (608,'bookname608','publsiher608','author608',2005,3), (609,'bookname609','publsiher609','author609',2008,15), (610,'bookname610','publsiher610','author610',2005,11), (611,'bookname611','publsiher611','author611',2010,14), (612,'bookname612','publsiher612','author612',2004,14), (613,'bookname613','publsiher613','author613',2002,26), (614,'bookname614','publsiher614','author614',2010,10), (615,'bookname615','publsiher615','author615',2001,14), (616,'bookname616','publsiher616','author616',2003,10), (617,'bookname617','publsiher617','author617',2007,23), (618,'bookname618','publsiher618','author618',2005,2), (619,'bookname619','publsiher619','author619',2009,6), (620,'bookname620','publsiher620','author620',2005,11), (621,'bookname621','publsiher621','author621',2003,8), (622,'bookname622','publsiher622','author622',2003,26), (623,'bookname623','publsiher623','author623',2006,24), (624,'bookname624','publsiher624','author624',2006,30), (625,'bookname625','publsiher625','author625',2002,18), (626,'bookname626','publsiher626','author626',2004,8), (627,'bookname627','publsiher627','author627',2003,24), (628,'bookname628','publsiher628','author628',2001,10), (629,'bookname629','publsiher629','author629',2004,15), (630,'bookname630','publsiher630','author630',2002,2), (631,'bookname631','publsiher631','author631',2004,10), (632,'bookname632','publsiher632','author632',2010,24), (633,'bookname633','publsiher633','author633',2003,10), (634,'bookname634','publsiher634','author634',2009,14), (635,'bookname635','publsiher635','author635',2003,25), (636,'bookname636','publsiher636','author636',2007,5), (637,'bookname637','publsiher637','author637',2003,8), (638,'bookname638','publsiher638','author638',2007,26), (639,'bookname639','publsiher639','author639',2001,5), (640,'bookname640','publsiher640','author640',2003,10), (641,'bookname641','publsiher641','author641',2006,19), (642,'bookname642','publsiher642','author642',2010,22), (643,'bookname643','publsiher643','author643',2001,29), (644,'bookname644','publsiher644','author644',2010,30), (645,'bookname645','publsiher645','author645',2006,21), (646,'bookname646','publsiher646','author646',2009,23), (647,'bookname647','publsiher647','author647',2003,8), (648,'bookname648','publsiher648','author648',2008,8), (649,'bookname649','publsiher649','author649',2003,4), (650,'bookname650','publsiher650','author650',2005,22), (651,'bookname651','publsiher651','author651',2010,28), (652,'bookname652','publsiher652','author652',2008,4), (653,'bookname653','publsiher653','author653',2007,4), (654,'bookname654','publsiher654','author654',2010,11), (655,'bookname655','publsiher655','author655',2002,28), (656,'bookname656','publsiher656','author656',2006,24), (657,'bookname657','publsiher657','author657',2005,5), (658,'bookname658','publsiher658','author658',2002,29), (659,'bookname659','publsiher659','author659',2005,20), (660,'bookname660','publsiher660','author660',2006,23), (661,'bookname661','publsiher661','author661',2006,19), (662,'bookname662','publsiher662','author662',2004,28), (663,'bookname663','publsiher663','author663',2001,14), (664,'bookname664','publsiher664','author664',2010,15), (665,'bookname665','publsiher665','author665',2004,14), (666,'bookname666','publsiher666','author666',2002,16), (667,'bookname667','publsiher667','author667',2007,4), (668,'bookname668','publsiher668','author668',2007,3), (669,'bookname669','publsiher669','author669',2002,26), (670,'bookname670','publsiher670','author670',2003,30), (671,'bookname671','publsiher671','author671',2006,9), (672,'bookname672','publsiher672','author672',2001,22), (673,'bookname673','publsiher673','author673',2008,31), (674,'bookname674','publsiher674','author674',2001,23), (675,'bookname675','publsiher675','author675',2001,13), (676,'bookname676','publsiher676','author676',2002,15), (677,'bookname677','publsiher677','author677',2003,24), (678,'bookname678','publsiher678','author678',2001,24), (679,'bookname679','publsiher679','author679',2004,19), (680,'bookname680','publsiher680','author680',2009,31), (681,'bookname681','publsiher681','author681',2003,29), (682,'bookname682','publsiher682','author682',2009,22), (683,'bookname683','publsiher683','author683',2010,8), (684,'bookname684','publsiher684','author684',2004,16), (685,'bookname685','publsiher685','author685',2001,21), (686,'bookname686','publsiher686','author686',2004,1), (687,'bookname687','publsiher687','author687',2004,10), (688,'bookname688','publsiher688','author688',2002,2), (689,'bookname689','publsiher689','author689',2001,21), (690,'bookname690','publsiher690','author690',2009,5), (691,'bookname691','publsiher691','author691',2002,11), (692,'bookname692','publsiher692','author692',2008,20), (693,'bookname693','publsiher693','author693',2006,25), (694,'bookname694','publsiher694','author694',2008,4), (695,'bookname695','publsiher695','author695',2001,30), (696,'bookname696','publsiher696','author696',2009,31), (697,'bookname697','publsiher697','author697',2007,15), (698,'bookname698','publsiher698','author698',2009,27), (699,'bookname699','publsiher699','author699',2007,9), (700,'bookname700','publsiher700','author700',2006,1), (701,'bookname701','publsiher701','author701',2009,18), (702,'bookname702','publsiher702','author702',2010,24), (703,'bookname703','publsiher703','author703',2004,9), (704,'bookname704','publsiher704','author704',2008,13), (705,'bookname705','publsiher705','author705',2001,26), (706,'bookname706','publsiher706','author706',2009,2), (707,'bookname707','publsiher707','author707',2005,7), (708,'bookname708','publsiher708','author708',2005,15), (709,'bookname709','publsiher709','author709',2006,10), (710,'bookname710','publsiher710','author710',2001,8), (711,'bookname711','publsiher711','author711',2005,6), (712,'bookname712','publsiher712','author712',2005,3), (713,'bookname713','publsiher713','author713',2001,20), (714,'bookname714','publsiher714','author714',2006,5), (715,'bookname715','publsiher715','author715',2006,24), (716,'bookname716','publsiher716','author716',2004,23), (717,'bookname717','publsiher717','author717',2003,17), (718,'bookname718','publsiher718','author718',2002,21), (719,'bookname719','publsiher719','author719',2001,8), (720,'bookname720','publsiher720','author720',2004,5), (721,'bookname721','publsiher721','author721',2005,2), (722,'bookname722','publsiher722','author722',2004,17), (723,'bookname723','publsiher723','author723',2006,6), (724,'bookname724','publsiher724','author724',2004,14), (725,'bookname725','publsiher725','author725',2008,27), (726,'bookname726','publsiher726','author726',2009,21), (727,'bookname727','publsiher727','author727',2004,13), (728,'bookname728','publsiher728','author728',2003,7), (729,'bookname729','publsiher729','author729',2007,27), (730,'bookname730','publsiher730','author730',2009,29), (731,'bookname731','publsiher731','author731',2009,26), (732,'bookname732','publsiher732','author732',2005,29), (733,'bookname733','publsiher733','author733',2003,10), (734,'bookname734','publsiher734','author734',2006,18), (735,'bookname735','publsiher735','author735',2001,31), (736,'bookname736','publsiher736','author736',2009,30), (737,'bookname737','publsiher737','author737',2002,1), (738,'bookname738','publsiher738','author738',2001,4), (739,'bookname739','publsiher739','author739',2006,20), (740,'bookname740','publsiher740','author740',2006,31), (741,'bookname741','publsiher741','author741',2005,9), (742,'bookname742','publsiher742','author742',2010,22), (743,'bookname743','publsiher743','author743',2004,25), (744,'bookname744','publsiher744','author744',2009,5), (745,'bookname745','publsiher745','author745',2002,6), (746,'bookname746','publsiher746','author746',2007,28), (747,'bookname747','publsiher747','author747',2006,28), (748,'bookname748','publsiher748','author748',2003,16), (749,'bookname749','publsiher749','author749',2005,22), (750,'bookname750','publsiher750','author750',2010,20), (751,'bookname751','publsiher751','author751',2006,6), (752,'bookname752','publsiher752','author752',2006,24), (753,'bookname753','publsiher753','author753',2002,20), (754,'bookname754','publsiher754','author754',2005,10), (755,'bookname755','publsiher755','author755',2007,10), (756,'bookname756','publsiher756','author756',2001,21), (757,'bookname757','publsiher757','author757',2001,16), (758,'bookname758','publsiher758','author758',2003,2), (759,'bookname759','publsiher759','author759',2010,5), (760,'bookname760','publsiher760','author760',2001,25), (761,'bookname761','publsiher761','author761',2006,18), (762,'bookname762','publsiher762','author762',2001,12), (763,'bookname763','publsiher763','author763',2006,3), (764,'bookname764','publsiher764','author764',2004,6), (765,'bookname765','publsiher765','author765',2007,5), (766,'bookname766','publsiher766','author766',2005,6), (767,'bookname767','publsiher767','author767',2002,7), (768,'bookname768','publsiher768','author768',2004,21), (769,'bookname769','publsiher769','author769',2001,9), (770,'bookname770','publsiher770','author770',2008,15), (771,'bookname771','publsiher771','author771',2009,1), (772,'bookname772','publsiher772','author772',2005,8), (773,'bookname773','publsiher773','author773',2004,16), (774,'bookname774','publsiher774','author774',2007,20), (775,'bookname775','publsiher775','author775',2008,6), (776,'bookname776','publsiher776','author776',2003,13), (777,'bookname777','publsiher777','author777',2002,1), (778,'bookname778','publsiher778','author778',2008,16), (779,'bookname779','publsiher779','author779',2008,19), (780,'bookname780','publsiher780','author780',2005,7), (781,'bookname781','publsiher781','author781',2004,1), (782,'bookname782','publsiher782','author782',2010,18), (783,'bookname783','publsiher783','author783',2005,30), (784,'bookname784','publsiher784','author784',2001,19), (785,'bookname785','publsiher785','author785',2007,11), (786,'bookname786','publsiher786','author786',2009,26), (787,'bookname787','publsiher787','author787',2010,31), (788,'bookname788','publsiher788','author788',2003,13), (789,'bookname789','publsiher789','author789',2003,8), (790,'bookname790','publsiher790','author790',2008,28), (791,'bookname791','publsiher791','author791',2003,3), (792,'bookname792','publsiher792','author792',2004,13), (793,'bookname793','publsiher793','author793',2001,20), (794,'bookname794','publsiher794','author794',2001,31), (795,'bookname795','publsiher795','author795',2009,27), (796,'bookname796','publsiher796','author796',2005,15), (797,'bookname797','publsiher797','author797',2008,24), (798,'bookname798','publsiher798','author798',2008,4), (799,'bookname799','publsiher799','author799',2004,10), (800,'bookname800','publsiher800','author800',2010,17), (801,'bookname801','publsiher801','author801',2002,18), (802,'bookname802','publsiher802','author802',2003,17), (803,'bookname803','publsiher803','author803',2005,10), (804,'bookname804','publsiher804','author804',2001,31), (805,'bookname805','publsiher805','author805',2006,30), (806,'bookname806','publsiher806','author806',2002,30), (807,'bookname807','publsiher807','author807',2003,7), (808,'bookname808','publsiher808','author808',2002,15), (809,'bookname809','publsiher809','author809',2004,23), (810,'bookname810','publsiher810','author810',2010,29), (811,'bookname811','publsiher811','author811',2005,1), (812,'bookname812','publsiher812','author812',2003,7), (813,'bookname813','publsiher813','author813',2005,25), (814,'bookname814','publsiher814','author814',2003,4), (815,'bookname815','publsiher815','author815',2008,18), (816,'bookname816','publsiher816','author816',2006,23), (817,'bookname817','publsiher817','author817',2005,18), (818,'bookname818','publsiher818','author818',2009,26), (819,'bookname819','publsiher819','author819',2009,26), (820,'bookname820','publsiher820','author820',2004,13), (821,'bookname821','publsiher821','author821',2008,3), (822,'bookname822','publsiher822','author822',2006,24), (823,'bookname823','publsiher823','author823',2003,20), (824,'bookname824','publsiher824','author824',2010,7), (825,'bookname825','publsiher825','author825',2010,8), (826,'bookname826','publsiher826','author826',2004,24), (827,'bookname827','publsiher827','author827',2005,11), (828,'bookname828','publsiher828','author828',2009,19), (829,'bookname829','publsiher829','author829',2006,14), (830,'bookname830','publsiher830','author830',2009,7), (831,'bookname831','publsiher831','author831',2005,1), (832,'bookname832','publsiher832','author832',2010,16), (833,'bookname833','publsiher833','author833',2008,5), (834,'bookname834','publsiher834','author834',2008,18), (835,'bookname835','publsiher835','author835',2002,27), (836,'bookname836','publsiher836','author836',2004,28), (837,'bookname837','publsiher837','author837',2007,8), (838,'bookname838','publsiher838','author838',2004,6), (839,'bookname839','publsiher839','author839',2002,16), (840,'bookname840','publsiher840','author840',2003,21), (841,'bookname841','publsiher841','author841',2009,12), (842,'bookname842','publsiher842','author842',2002,14), (843,'bookname843','publsiher843','author843',2001,21), (844,'bookname844','publsiher844','author844',2005,28), (845,'bookname845','publsiher845','author845',2005,31), (846,'bookname846','publsiher846','author846',2009,12), (847,'bookname847','publsiher847','author847',2001,7), (848,'bookname848','publsiher848','author848',2008,13), (849,'bookname849','publsiher849','author849',2005,14), (850,'bookname850','publsiher850','author850',2003,13), (851,'bookname851','publsiher851','author851',2008,2), (852,'bookname852','publsiher852','author852',2007,31), (853,'bookname853','publsiher853','author853',2002,15), (854,'bookname854','publsiher854','author854',2010,16), (855,'bookname855','publsiher855','author855',2001,2), (856,'bookname856','publsiher856','author856',2005,3), (857,'bookname857','publsiher857','author857',2001,24), (858,'bookname858','publsiher858','author858',2006,2), (859,'bookname859','publsiher859','author859',2009,27), (860,'bookname860','publsiher860','author860',2010,24), (861,'bookname861','publsiher861','author861',2007,18), (862,'bookname862','publsiher862','author862',2003,7), (863,'bookname863','publsiher863','author863',2003,4), (864,'bookname864','publsiher864','author864',2001,26), (865,'bookname865','publsiher865','author865',2003,14), (866,'bookname866','publsiher866','author866',2004,8), (867,'bookname867','publsiher867','author867',2004,9), (868,'bookname868','publsiher868','author868',2003,21), (869,'bookname869','publsiher869','author869',2005,7), (870,'bookname870','publsiher870','author870',2004,20), (871,'bookname871','publsiher871','author871',2009,26), (872,'bookname872','publsiher872','author872',2001,29), (873,'bookname873','publsiher873','author873',2010,16), (874,'bookname874','publsiher874','author874',2002,17), (875,'bookname875','publsiher875','author875',2003,26), (876,'bookname876','publsiher876','author876',2003,8), (877,'bookname877','publsiher877','author877',2004,31), (878,'bookname878','publsiher878','author878',2002,7), (879,'bookname879','publsiher879','author879',2008,4), (880,'bookname880','publsiher880','author880',2003,11), (881,'bookname881','publsiher881','author881',2002,1), (882,'bookname882','publsiher882','author882',2009,4), (883,'bookname883','publsiher883','author883',2005,13), (884,'bookname884','publsiher884','author884',2004,1), (885,'bookname885','publsiher885','author885',2009,6), (886,'bookname886','publsiher886','author886',2001,16), (887,'bookname887','publsiher887','author887',2007,22), (888,'bookname888','publsiher888','author888',2010,13), (889,'bookname889','publsiher889','author889',2002,13), (890,'bookname890','publsiher890','author890',2010,12), (891,'bookname891','publsiher891','author891',2007,29), (892,'bookname892','publsiher892','author892',2004,24), (893,'bookname893','publsiher893','author893',2003,19), (894,'bookname894','publsiher894','author894',2008,17), (895,'bookname895','publsiher895','author895',2008,18), (896,'bookname896','publsiher896','author896',2008,10), (897,'bookname897','publsiher897','author897',2007,19), (898,'bookname898','publsiher898','author898',2001,10), (899,'bookname899','publsiher899','author899',2003,18), (900,'bookname900','publsiher900','author900',2010,26), (901,'bookname901','publsiher901','author901',2005,18), (902,'bookname902','publsiher902','author902',2001,7), (903,'bookname903','publsiher903','author903',2010,10), (904,'bookname904','publsiher904','author904',2001,20), (905,'bookname905','publsiher905','author905',2006,4), (906,'bookname906','publsiher906','author906',2002,23), (907,'bookname907','publsiher907','author907',2008,9), (908,'bookname908','publsiher908','author908',2010,29), (909,'bookname909','publsiher909','author909',2003,24), (910,'bookname910','publsiher910','author910',2007,22), (911,'bookname911','publsiher911','author911',2002,19), (912,'bookname912','publsiher912','author912',2001,13), (913,'bookname913','publsiher913','author913',2004,5), (914,'bookname914','publsiher914','author914',2007,25), (915,'bookname915','publsiher915','author915',2007,4), (916,'bookname916','publsiher916','author916',2009,17), (917,'bookname917','publsiher917','author917',2003,3), (918,'bookname918','publsiher918','author918',2003,6), (919,'bookname919','publsiher919','author919',2003,9), (920,'bookname920','publsiher920','author920',2006,22), (921,'bookname921','publsiher921','author921',2006,28), (922,'bookname922','publsiher922','author922',2001,16), (923,'bookname923','publsiher923','author923',2008,24), (924,'bookname924','publsiher924','author924',2002,11), (925,'bookname925','publsiher925','author925',2005,16), (926,'bookname926','publsiher926','author926',2002,20), (927,'bookname927','publsiher927','author927',2006,6), (928,'bookname928','publsiher928','author928',2010,13), (929,'bookname929','publsiher929','author929',2007,17), (930,'bookname930','publsiher930','author930',2010,21), (931,'bookname931','publsiher931','author931',2007,10), (932,'bookname932','publsiher932','author932',2008,31), (933,'bookname933','publsiher933','author933',2006,17), (934,'bookname934','publsiher934','author934',2007,7), (935,'bookname935','publsiher935','author935',2010,18), (936,'bookname936','publsiher936','author936',2006,22), (937,'bookname937','publsiher937','author937',2004,18), (938,'bookname938','publsiher938','author938',2010,21), (939,'bookname939','publsiher939','author939',2003,12), (940,'bookname940','publsiher940','author940',2010,15), (941,'bookname941','publsiher941','author941',2005,12), (942,'bookname942','publsiher942','author942',2010,2), (943,'bookname943','publsiher943','author943',2004,3), (944,'bookname944','publsiher944','author944',2010,6), (945,'bookname945','publsiher945','author945',2002,31), (946,'bookname946','publsiher946','author946',2002,12), (947,'bookname947','publsiher947','author947',2001,6), (948,'bookname948','publsiher948','author948',2004,2), (949,'bookname949','publsiher949','author949',2004,6), (950,'bookname950','publsiher950','author950',2001,14), (951,'bookname951','publsiher951','author951',2006,26), (952,'bookname952','publsiher952','author952',2005,13), (953,'bookname953','publsiher953','author953',2010,1), (954,'bookname954','publsiher954','author954',2008,22), (955,'bookname955','publsiher955','author955',2002,27), (956,'bookname956','publsiher956','author956',2008,29), (957,'bookname957','publsiher957','author957',2005,3), (958,'bookname958','publsiher958','author958',2010,25), (959,'bookname959','publsiher959','author959',2004,30), (960,'bookname960','publsiher960','author960',2008,4), (961,'bookname961','publsiher961','author961',2003,13), (962,'bookname962','publsiher962','author962',2006,22), (963,'bookname963','publsiher963','author963',2001,23), (964,'bookname964','publsiher964','author964',2004,30), (965,'bookname965','publsiher965','author965',2006,25), (966,'bookname966','publsiher966','author966',2001,17), (967,'bookname967','publsiher967','author967',2001,2), (968,'bookname968','publsiher968','author968',2005,14), (969,'bookname969','publsiher969','author969',2004,28), (970,'bookname970','publsiher970','author970',2004,2), (971,'bookname971','publsiher971','author971',2002,22), (972,'bookname972','publsiher972','author972',2010,26), (973,'bookname973','publsiher973','author973',2005,15), (974,'bookname974','publsiher974','author974',2008,5), (975,'bookname975','publsiher975','author975',2007,9), (976,'bookname976','publsiher976','author976',2008,3), (977,'bookname977','publsiher977','author977',2006,7), (978,'bookname978','publsiher978','author978',2003,20), (979,'bookname979','publsiher979','author979',2007,18), (980,'bookname980','publsiher980','author980',2003,1), (981,'bookname981','publsiher981','author981',2001,28), (982,'bookname982','publsiher982','author982',2007,9), (983,'bookname983','publsiher983','author983',2003,22), (984,'bookname984','publsiher984','author984',2002,7), (985,'bookname985','publsiher985','author985',2004,2), (986,'bookname986','publsiher986','author986',2006,16), (987,'bookname987','publsiher987','author987',2004,3), (988,'bookname988','publsiher988','author988',2005,18), (989,'bookname989','publsiher989','author989',2007,21), (990,'bookname990','publsiher990','author990',2005,19), (991,'bookname991','publsiher991','author991',2007,30), (992,'bookname992','publsiher992','author992',2005,10), (993,'bookname993','publsiher993','author993',2001,3), (994,'bookname994','publsiher994','author994',2001,21), (995,'bookname995','publsiher995','author995',2009,1), (996,'bookname996','publsiher996','author996',2010,2), (997,'bookname997','publsiher997','author997',2006,20), (998,'bookname998','publsiher998','author998',2007,5), (999,'bookname999','publsiher999','author999',2004,5), (0,'bookname0','publsiher0','author0',2001,1); -------------------------------------------------------------------------------- /glue-etl-pipeline-dataquality-check.txt: -------------------------------------------------------------------------------- 1 | 2 | =========== Reusable Workflow Configuration =================== 3 | 4 | { 5 | "Comment": "Data Quality Check Workflow", 6 | "StartAt": "StartProfileJob", 7 | "States": { 8 | "StartProfileJob": { 9 | "Type": "Task", 10 | "Resource": "arn:aws:states:::databrew:startJobRun.sync", 11 | "Parameters": { 12 | "Name.$": "$.profilejobname" 13 | }, 14 | "Next": "CheckDQOutput" 15 | }, 16 | "CheckDQOutput": { 17 | "Type": "Task", 18 | "Resource": "arn:aws:states:::lambda:invoke", 19 | "OutputPath": "$.Payload", 20 | "Parameters": { 21 | "Payload.$": "$", 22 | "FunctionName": "" 23 | }, 24 | "Retry": [ 25 | { 26 | "ErrorEquals": [ 27 | "Lambda.ServiceException", 28 | "Lambda.AWSLambdaException", 29 | "Lambda.SdkClientException" 30 | ], 31 | "IntervalSeconds": 2, 32 | "MaxAttempts": 6, 33 | "BackoffRate": 2 34 | } 35 | ], 36 | "Next": "Choice" 37 | }, 38 | "Choice": { 39 | "Type": "Choice", 40 | "Choices": [ 41 | { 42 | "Not": { 43 | "Variable": "$.dqstatus", 44 | "StringEquals": "SUCCEEDED" 45 | }, 46 | "Next": "NotifyDQFail" 47 | } 48 | ], 49 | "Default": "Pass" 50 | }, 51 | "NotifyDQFail": { 52 | "Type": "Task", 53 | "Resource": "arn:aws:states:::sns:publish", 54 | "Parameters": { 55 | "Message.$": "$", 56 | "TopicArn": "" 57 | }, 58 | "Next": "Fail" 59 | }, 60 | "Fail": { 61 | "Type": "Fail", 62 | "Error": "Data Quality Check Failed" 63 | }, 64 | "Pass": { 65 | "Type": "Pass", 66 | "End": true 67 | } 68 | } 69 | } 70 | 71 | =========== Lambda Code to Check DQ Rules Result =================== 72 | 73 | import json 74 | import boto3 75 | 76 | def lambda_handler(event, context): 77 | # TODO implement 78 | bucketname = "" 79 | filename = "" 80 | jobname = event["JobName"] 81 | for o in event["Outputs"]: 82 | bucketname = o["Location"]["Bucket"] 83 | if "dq-validation" in o["Location"]["Key"]: 84 | filename = o["Location"]["Key"] 85 | 86 | s3 = boto3.resource('s3') 87 | 88 | content_object = s3.Object(bucketname, filename) 89 | file_content = content_object.get()['Body'].read().decode('utf-8') 90 | profilejson = json.loads(file_content) 91 | 92 | ruleset = "" 93 | status = "" 94 | 95 | for rs in profilejson["rulesetResults"]: 96 | ruleset = rs["name"] 97 | status = rs["status"] 98 | 99 | return { 100 | 'statusCode': 200, 101 | 'dqstatus': status, 102 | 'ruleset': ruleset, 103 | 'jobname' : jobname 104 | } 105 | 106 | =========== ETL Pipeline Workflow Configuration =================== 107 | 108 | { 109 | "Comment": "ETL Pipeline", 110 | "StartAt": "Glue StartJobRun", 111 | "States": { 112 | "Glue StartJobRun": { 113 | "Type": "Task", 114 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 115 | "Parameters": { 116 | "JobName": "", 125 | "Input": { 126 | "profilejobname": "", 127 | "AWS_STEP_FUNCTIONS_STARTED_BY_EXECUTION_ID.$": "$$.Execution.Id" 128 | } 129 | }, 130 | "End": true 131 | } 132 | } 133 | } -------------------------------------------------------------------------------- /glue-external-libraries.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-dojo/analytics/7bf14ff1faaba9c4e3f9e9706a8765c272b01208/glue-external-libraries.zip -------------------------------------------------------------------------------- /glue_jdbc_parallel_read.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "cab52480", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import sys\n", 11 | "from awsglue.transforms import *\n", 12 | "from awsglue.utils import getResolvedOptions\n", 13 | "from pyspark.context import SparkContext\n", 14 | "from awsglue.context import GlueContext\n", 15 | "from awsglue.job import Job\n", 16 | "import time\n", 17 | "\n", 18 | "glueContext = GlueContext(SparkContext.getOrCreate())" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "e6dbff06", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "fooddf = glueContext.create_dynamic_frame.from_catalog(\n", 29 | " database=\"dojodb\",\n", 30 | " table_name=\"postgres_public_fooddemand\",\n", 31 | ")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "4553f0b0", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "fooddf1 = glueContext.create_dynamic_frame.from_catalog(\n", 42 | " database=\"dojodb\",\n", 43 | " table_name=\"postgres_public_fooddemand\",\n", 44 | " additional_options = {\n", 45 | " 'hashfield': 'week',\n", 46 | " 'hashpartitions': '5'\n", 47 | " }\n", 48 | ")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "2befc711", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "fooddf.count()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "e56c2a1d", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "fooddf1.count()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "7e0f0072", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "import boto3\n", 79 | "import json\n", 80 | "\n", 81 | "secret_name = \"\"\n", 82 | "region_name = \"\"\n", 83 | "session = boto3.session.Session()\n", 84 | "client = session.client(service_name='secretsmanager',region_name=region_name)\n", 85 | "\n", 86 | "response = client.get_secret_value(SecretId=secret_name)\n", 87 | "\n", 88 | "secrets = json.loads(response[\"SecretString\"])\n", 89 | "\n", 90 | "uname = secrets[\"username\"]\n", 91 | "pwd = secrets[\"password\"]\n", 92 | "host = secrets[\"host\"]\n", 93 | "\n", 94 | "print(host)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "b46a7981", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "glueContext.write_dynamic_frame.from_options(\n", 105 | " frame=fooddf,\n", 106 | " connection_type=\"redshift\",\n", 107 | " connection_options = {\n", 108 | " \"user\" : uname,\n", 109 | " \"password\" : pwd,\n", 110 | " \"url\" : \"jdbc:redshift://\" + host + \":5439/dev\",\n", 111 | " \"dbtable\" : \"public.fooddemand\",\n", 112 | " \"redshiftTmpDir\": \"s3://dojo-dataset/temp/\",\n", 113 | " \"bulkSize\" : \"2\"} \n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "6c1d14d6", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Sparkmagic (PySpark)", 129 | "language": "python", 130 | "name": "pysparkkernel" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "python", 135 | "version": 3 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "pyspark", 140 | "pygments_lexer": "python3" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 5 145 | } 146 | -------------------------------------------------------------------------------- /gluejobtransactioncode.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.utils import getResolvedOptions 3 | from pyspark.context import SparkContext 4 | from awsglue.context import GlueContext 5 | from awsglue.job import Job 6 | 7 | args = getResolvedOptions(sys.argv, ["JOB_NAME"]) 8 | sc = SparkContext.getOrCreate() 9 | glueContext = GlueContext(sc) 10 | spark = glueContext.spark_session 11 | job = Job(glueContext) 12 | job.init(args["JOB_NAME"], args) 13 | 14 | db = "dojodb" 15 | tbl = "src_postgres_public_customers" 16 | tx_id = glueContext.start_transaction(False) 17 | 18 | datasource0 = glueContext.create_dynamic_frame.from_catalog( 19 | database = db, table_name = tbl, 20 | transformation_ctx = "datasource0") 21 | 22 | dest_path = "s3://dojo-dataset/customers/" 23 | 24 | sink = glueContext.getSink( 25 | connection_type="s3", path=dest_path, 26 | enableUpdateCatalog=True, 27 | transactionId=tx_id, 28 | additional_options={ 29 | "callDeleteObjectsOnCancel":"true" 30 | } 31 | ) 32 | sink.setFormat("glueparquet") 33 | sink.setCatalogInfo( 34 | catalogDatabase=db, catalogTableName="customers_governed" 35 | ) 36 | 37 | try: 38 | sink.writeFrame(datasource0) 39 | glueContext.commit_transaction(tx_id) 40 | except Exception: 41 | glueContext.cancel_transaction(tx_id) 42 | raise 43 | job.commit() -------------------------------------------------------------------------------- /jobcode.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | 8 | args = getResolvedOptions(sys.argv, ["JOB_NAME"]) 9 | sc = SparkContext() 10 | glueContext = GlueContext(sc) 11 | spark = glueContext.spark_session 12 | job = Job(glueContext) 13 | job.init(args["JOB_NAME"], args) 14 | 15 | sensordf = glueContext.create_dynamic_frame.from_options( 16 | format_options={}, 17 | connection_type="s3", 18 | format="parquet", 19 | connection_options={"paths": ["s3://dojo-mydata/sensordata/"], "recurse": True, 'groupFiles': 'inPartition', 'groupSize': '1048576'} 20 | ) 21 | 22 | sensordf = SelectFields.apply( 23 | frame=sensordf, 24 | paths=["recorddate", "failure"] 25 | ) 26 | 27 | glueContext.write_dynamic_frame.from_options( 28 | frame=sensordf, 29 | connection_type="s3", 30 | format="glueparquet", 31 | connection_options={"path": "s3://dojo-mydata/output/", "partitionKeys": []}, 32 | format_options={"compression": "snappy"} 33 | ) 34 | 35 | job.commit() 36 | -------------------------------------------------------------------------------- /lakeformationtransactioncode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import boto3 8 | client = boto3.client('lakeformation') 9 | 10 | objectname1 = 'run-AmazonS3_node1640170420462-2-part-block-0-r-00006-snappy.parquet' 11 | objectname2 = 'run-AmazonS3_node1640181213980-1-part-block-0-r-00005-snappy.parquet' 12 | bucketname = 'dojo-dataset' 13 | sourcefolder1 = 'source/employees/' 14 | sourcefolder2 = 'source/customers/' 15 | destinationfolder1 = 'employees/' 16 | destinationfolder2 = 'customers/' 17 | accountnumber = '999999999999' 18 | database = "dojodb" 19 | table1 = 'employees_governed' 20 | table2 = 'customers_governed' 21 | etag1 = 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' 22 | etag2 = 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' 23 | size1 = 614 24 | size2 = 519 25 | 26 | 27 | # In[2]: 28 | 29 | 30 | response = client.start_transaction(TransactionType='READ_AND_WRITE') 31 | response 32 | 33 | 34 | # In[3]: 35 | 36 | 37 | trx_id = response['TransactionId'] 38 | trx_id 39 | 40 | 41 | # In[4]: 42 | 43 | 44 | s3 = boto3.resource('s3') 45 | 46 | copy_source = { 47 | 'Bucket': bucketname, 48 | 'Key': sourcefolder1 + objectname1} 49 | 50 | s3.meta.client.copy(copy_source,bucketname, destinationfolder1 + objectname1) 51 | 52 | copy_source = { 53 | 'Bucket': bucketname, 54 | 'Key': sourcefolder2 + objectname2} 55 | 56 | s3.meta.client.copy(copy_source,bucketname, destinationfolder2 + objectname2) 57 | 58 | 59 | # In[5]: 60 | 61 | 62 | response = client.delete_objects_on_cancel( 63 | CatalogId=accountnumber, 64 | DatabaseName=database, 65 | TableName=table1, 66 | TransactionId=trx_id, 67 | Objects=[ 68 | { 69 | 'Uri': 's3://' + bucketname + '/' + destinationfolder1 + objectname1, 70 | 'ETag': etag1 71 | } 72 | ] 73 | ) 74 | 75 | response = client.delete_objects_on_cancel( 76 | CatalogId=accountnumber, 77 | DatabaseName=database, 78 | TableName=table2, 79 | TransactionId=trx_id, 80 | Objects=[ 81 | { 82 | 'Uri': 's3://' + bucketname + '/' + destinationfolder2 + objectname2, 83 | 'ETag': etag2 84 | } 85 | ] 86 | ) 87 | 88 | 89 | # In[6]: 90 | 91 | 92 | response = client.update_table_objects( 93 | CatalogId=accountnumber, 94 | DatabaseName=database, 95 | TableName=table1, 96 | TransactionId=trx_id, 97 | WriteOperations=[ 98 | { 99 | 'AddObject': { 100 | 'Uri': 's3://' + bucketname + '/' + destinationfolder1 + objectname1, 101 | 'ETag': etag1, 102 | 'Size': size1 103 | } 104 | } 105 | ] 106 | ) 107 | 108 | response = client.update_table_objects( 109 | CatalogId=accountnumber, 110 | DatabaseName=database, 111 | TableName=table2, 112 | TransactionId=trx_id, 113 | WriteOperations=[ 114 | { 115 | 'AddObject': { 116 | 'Uri': 's3://' + bucketname + '/' + destinationfolder2 + objectname2, 117 | 'ETag': etag2, 118 | 'Size': size2 119 | } 120 | } 121 | ] 122 | ) 123 | 124 | 125 | # In[ ]: 126 | 127 | 128 | response = client.commit_transaction(TransactionId=trx_id) 129 | response 130 | 131 | 132 | # In[7]: 133 | 134 | 135 | response = client.cancel_transaction(TransactionId=trx_id) 136 | response 137 | 138 | 139 | # In[8]: 140 | 141 | 142 | response = client.describe_transaction(TransactionId=trx_id) 143 | response 144 | 145 | 146 | # In[ ]: 147 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /multi-file-ingestion-code.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-dojo/analytics/7bf14ff1faaba9c4e3f9e9706a8765c272b01208/multi-file-ingestion-code.zip -------------------------------------------------------------------------------- /multiple-table-ingestion-pipeline.md: -------------------------------------------------------------------------------- 1 | ###### Glue Job - jdbcdataingestion ########### 2 | 3 | import sys 4 | from awsglue.transforms import * 5 | from awsglue.utils import getResolvedOptions 6 | from pyspark.context import SparkContext 7 | from awsglue.context import GlueContext 8 | from awsglue.job import Job 9 | 10 | args = getResolvedOptions(sys.argv, ["JOB_NAME","cdb","ctbl","dest"]) 11 | sc = SparkContext() 12 | glueContext = GlueContext(sc) 13 | spark = glueContext.spark_session 14 | job = Job(glueContext) 15 | job.init(args["JOB_NAME"], args) 16 | 17 | db = args["cdb"] 18 | tbl = args["ctbl"] 19 | dest = args["dest"] 20 | 21 | # Script generated for node Amazon Redshift 22 | AmazonRedshift_node1651236431390 = glueContext.create_dynamic_frame.from_catalog( 23 | database=db, 24 | redshift_tmp_dir=args["TempDir"], 25 | table_name=tbl, 26 | transformation_ctx="AmazonRedshift_node1651236431390", 27 | ) 28 | 29 | # Script generated for node Amazon S3 30 | AmazonS3_node1651236450523 = glueContext.write_dynamic_frame.from_options( 31 | frame=AmazonRedshift_node1651236431390, 32 | connection_type="s3", 33 | format="glueparquet", 34 | connection_options={"path": dest, "partitionKeys": []}, 35 | format_options={"compression": "snappy"}, 36 | transformation_ctx="AmazonS3_node1651236450523", 37 | ) 38 | 39 | job.commit() 40 | 41 | ###### Pipeline - Step Functions Workflow ########### 42 | 43 | { 44 | "Comment": "A description of my state machine", 45 | "StartAt": "masterparallel", 46 | "States": { 47 | "masterparallel": { 48 | "Type": "Parallel", 49 | "Branches": [ 50 | { 51 | "StartAt": "Parallel1", 52 | "States": { 53 | "Parallel1": { 54 | "Type": "Parallel", 55 | "Branches": [ 56 | { 57 | "StartAt": "IngestOrdersData", 58 | "States": { 59 | "IngestOrdersData": { 60 | "Type": "Task", 61 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 62 | "Parameters": { 63 | "JobName": "jdbcdataingestion", 64 | "Arguments": { 65 | "--cdb": "dojodb", 66 | "--ctbl": "dev_public_orders", 67 | "--dest": "s3://my-demo-datalake/orders/" 68 | } 69 | }, 70 | "End": true 71 | } 72 | } 73 | }, 74 | { 75 | "StartAt": "IngestCustomersData", 76 | "States": { 77 | "IngestCustomersData": { 78 | "Type": "Task", 79 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 80 | "Parameters": { 81 | "JobName": "jdbcdataingestion", 82 | "Arguments": { 83 | "--cdb": "dojodb", 84 | "--ctbl": "dev_public_customers", 85 | "--dest": "s3://my-demo-datalake/customers/" 86 | } 87 | }, 88 | "End": true 89 | } 90 | } 91 | } 92 | ], 93 | "End": true 94 | } 95 | } 96 | }, 97 | { 98 | "StartAt": "Parallel2", 99 | "States": { 100 | "Parallel2": { 101 | "Type": "Parallel", 102 | "End": true, 103 | "Branches": [ 104 | { 105 | "StartAt": "IngestCustomerProfileData", 106 | "States": { 107 | "IngestCustomerProfileData": { 108 | "Type": "Task", 109 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 110 | "Parameters": { 111 | "JobName": "jdbcdataingestion", 112 | "Arguments": { 113 | "--cdb": "dojodb", 114 | "--ctbl": "dev_public_customerprofile", 115 | "--dest": "s3://my-demo-datalake/customerprofile/" 116 | } 117 | }, 118 | "End": true 119 | } 120 | } 121 | }, 122 | { 123 | "StartAt": "IngestSensorData", 124 | "States": { 125 | "IngestSensorData": { 126 | "Type": "Task", 127 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 128 | "Parameters": { 129 | "JobName": "jdbcdataingestion", 130 | "Arguments": { 131 | "--cdb": "dojodb", 132 | "--ctbl": "dev_public_sensordata", 133 | "--dest": "s3://my-demo-datalake/sensordata/" 134 | } 135 | }, 136 | "End": true 137 | } 138 | } 139 | } 140 | ] 141 | } 142 | } 143 | } 144 | ], 145 | "End": true 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /pipelinehandler.py: -------------------------------------------------------------------------------- 1 | Job1 2 | ==== 3 | import sys 4 | from awsglue.transforms import * 5 | from awsglue.utils import getResolvedOptions 6 | from pyspark.context import SparkContext 7 | from awsglue.context import GlueContext 8 | from awsglue.job import Job 9 | 10 | glueContext = GlueContext(SparkContext.getOrCreate()) 11 | 12 | customerDF = glueContext.create_dynamic_frame.from_catalog( 13 | database="dojodb", 14 | table_name="src_postgres_public_customers", redshift_tmp_dir="s3://dojo-dataset/scripts/") 15 | 16 | glueContext.write_dynamic_frame.from_options(customerDF, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/raw/customers"}, format = "csv") 17 | 18 | Job2 19 | ==== 20 | 21 | import sys 22 | from awsglue.transforms import * 23 | from awsglue.utils import getResolvedOptions 24 | from pyspark.context import SparkContext 25 | from awsglue.context import GlueContext 26 | from awsglue.job import Job 27 | 28 | glueContext = GlueContext(SparkContext.getOrCreate()) 29 | 30 | customersDF = glueContext.create_dynamic_frame.from_catalog( 31 | database="dojodb", 32 | table_name="raw_customers") 33 | glueContext.write_dynamic_frame.from_options(customersDF, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/cleansed/customers"}, format = "parquet") 34 | 35 | Lambda Code 36 | =========== 37 | 38 | import json 39 | import boto3 40 | 41 | def lambda_handler(event, context): 42 | # TODO implement 43 | source = "" 44 | if event["detail-type"] == "Glue Crawler State Change": 45 | source = event["detail"]["crawlerName"] 46 | print(source) 47 | 48 | if event["detail-type"] == "Glue Job State Change": 49 | source = event["detail"]["jobName"] 50 | print(source) 51 | 52 | ddclient = boto3.client('dynamodb') 53 | ddresp = ddclient.execute_statement(Statement= "select target, targettype from pipelineconfig where source = '" + source + "'") 54 | target = ddresp["Items"][0]["target"]["S"] 55 | targettype = ddresp["Items"][0]["targettype"]["S"] 56 | print(target) 57 | print(targettype) 58 | 59 | glueclient = boto3.client('glue') 60 | if targettype == "crawler": 61 | glueclient.start_crawler(Name=target) 62 | if targettype == "job": 63 | glueclient.start_job_run(JobName=target) 64 | 65 | return { 66 | 'statusCode': 200, 67 | 'body': json.dumps('Handler Called') 68 | } 69 | -------------------------------------------------------------------------------- /pushdownpredicate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "193bdd80", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import sys\n", 11 | "from awsglue.transforms import *\n", 12 | "from awsglue.utils import getResolvedOptions\n", 13 | "from pyspark.context import SparkContext\n", 14 | "from awsglue.context import GlueContext\n", 15 | "\n", 16 | "glueContext = GlueContext(SparkContext.getOrCreate())" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "aeca8840", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "customersdf = glueContext.create_dynamic_frame.from_catalog(\n", 27 | " database=\"dojodb\",\n", 28 | " table_name=\"customers\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "c33b8765", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "customersdf.count()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "a79bd4e2", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "customersdf = Filter.apply(customersdf, f = lambda x: x[\"spending\"] == \"High\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "3e7ee8b8", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "customersdf.count()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "ded55412", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "customersdf1 = glueContext.create_dynamic_frame.from_catalog(\n", 69 | " database=\"dojodb\",\n", 70 | " table_name=\"customers\",\n", 71 | " push_down_predicate = \"(spending == 'High')\")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "746ad0b9", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "customersdf1.count()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "0fc7c91d", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "customersdf1 = glueContext.create_dynamic_frame.from_catalog(\n", 92 | " database=\"dojodb\",\n", 93 | " table_name=\"customers\",\n", 94 | " push_down_predicate = \"(spending == 'High' and profession == 'Engineer')\")" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "87d698da", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "customersdf1.count()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "9dfd1dab", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Sparkmagic (PySpark)", 119 | "language": "python", 120 | "name": "pysparkkernel" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "python", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "pyspark", 130 | "pygments_lexer": "python3" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 5 135 | } 136 | -------------------------------------------------------------------------------- /redshift-data-api-etl.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-dojo/analytics/7bf14ff1faaba9c4e3f9e9706a8765c272b01208/redshift-data-api-etl.zip -------------------------------------------------------------------------------- /redshift_sql_workflow_step_functions.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-dojo/analytics/7bf14ff1faaba9c4e3f9e9706a8765c272b01208/redshift_sql_workflow_step_functions.zip -------------------------------------------------------------------------------- /stepfunctionsetl.py: -------------------------------------------------------------------------------- 1 | #job1 ***************** 2 | 3 | import sys 4 | from awsglue.transforms import * 5 | from awsglue.utils import getResolvedOptions 6 | from pyspark.context import SparkContext 7 | from awsglue.context import GlueContext 8 | from awsglue.job import Job 9 | 10 | glueContext = GlueContext(SparkContext.getOrCreate()) 11 | 12 | customerDF = glueContext.create_dynamic_frame.from_catalog( 13 | database="dojodb", 14 | table_name="src_postgres_public_customers", redshift_tmp_dir="s3://dojo-dataset/scripts/") 15 | 16 | glueContext.write_dynamic_frame.from_options(customerDF, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/raw/customers"}, format = "csv") 17 | 18 | #job2 *********************** 19 | 20 | import sys 21 | from awsglue.transforms import * 22 | from awsglue.utils import getResolvedOptions 23 | from pyspark.context import SparkContext 24 | from awsglue.context import GlueContext 25 | from awsglue.job import Job 26 | 27 | glueContext = GlueContext(SparkContext.getOrCreate()) 28 | 29 | customersDF = glueContext.create_dynamic_frame.from_catalog( 30 | database="dojodb", 31 | table_name="raw_customers") 32 | glueContext.write_dynamic_frame.from_options(customersDF, connection_type = "s3", connection_options = {"path": "s3://dojo-dataset/cleansed/customers"}, format = "parquet") 33 | 34 | #start-crawler-lambda ***************** 35 | 36 | import json 37 | import boto3 38 | 39 | def lambda_handler(event, context): 40 | target = event["crawlername"] 41 | glueclient = boto3.client('glue') 42 | glueclient.start_crawler(Name=target) 43 | 44 | #get-crawler-state-lambda *************** 45 | 46 | import json 47 | import boto3 48 | 49 | def lambda_handler(event, context): 50 | target = event["crawlername"] 51 | glueclient = boto3.client('glue') 52 | response = glueclient.get_crawler(Name=target) 53 | return { 54 | 'state': response['Crawler']['State'] 55 | } 56 | 57 | #step function json ***************** 58 | 59 | { 60 | "Comment": "A description of my state machine", 61 | "StartAt": "job1_start", 62 | "States": { 63 | "job1_start": { 64 | "Type": "Task", 65 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 66 | "Parameters": { 67 | "JobName": "job1" 68 | }, 69 | "Next": "crawler1_start" 70 | }, 71 | "crawler1_start": { 72 | "Type": "Task", 73 | "Resource": "arn:aws:states:::lambda:invoke", 74 | "OutputPath": "$.Payload", 75 | "Parameters": { 76 | "FunctionName": "arn:aws:lambda:eu-west-1::function:startcrawlerfunction:$LATEST", 77 | "Payload": { 78 | "crawlername": "crawler1" 79 | } 80 | }, 81 | "Retry": [ 82 | { 83 | "ErrorEquals": [ 84 | "Lambda.ServiceException", 85 | "Lambda.AWSLambdaException", 86 | "Lambda.SdkClientException" 87 | ], 88 | "IntervalSeconds": 2, 89 | "MaxAttempts": 6, 90 | "BackoffRate": 2 91 | } 92 | ], 93 | "Next": "get_crawler1_state" 94 | }, 95 | "get_crawler1_state": { 96 | "Type": "Task", 97 | "Resource": "arn:aws:states:::lambda:invoke", 98 | "OutputPath": "$.Payload", 99 | "Parameters": { 100 | "FunctionName": "arn:aws:lambda:eu-west-1::function:getcrawlerfunction:$LATEST", 101 | "Payload": { 102 | "crawlername": "crawler1" 103 | } 104 | }, 105 | "Retry": [ 106 | { 107 | "ErrorEquals": [ 108 | "Lambda.ServiceException", 109 | "Lambda.AWSLambdaException", 110 | "Lambda.SdkClientException" 111 | ], 112 | "IntervalSeconds": 2, 113 | "MaxAttempts": 6, 114 | "BackoffRate": 2 115 | } 116 | ], 117 | "Next": "Choice" 118 | }, 119 | "Choice": { 120 | "Type": "Choice", 121 | "Choices": [ 122 | { 123 | "Not": { 124 | "Variable": "$.state", 125 | "StringEquals": "READY" 126 | }, 127 | "Next": "Wait" 128 | } 129 | ], 130 | "Default": "job2_start" 131 | }, 132 | "Wait": { 133 | "Type": "Wait", 134 | "Seconds": 3, 135 | "Next": "get_crawler1_state" 136 | }, 137 | "job2_start": { 138 | "Type": "Task", 139 | "Resource": "arn:aws:states:::glue:startJobRun.sync", 140 | "Parameters": { 141 | "JobName": "job2" 142 | }, 143 | "Next": "crawler2_start" 144 | }, 145 | "crawler2_start": { 146 | "Type": "Task", 147 | "Resource": "arn:aws:states:::lambda:invoke", 148 | "OutputPath": "$.Payload", 149 | "Parameters": { 150 | "FunctionName": "arn:aws:lambda:eu-west-1::function:startcrawlerfunction:$LATEST", 151 | "Payload": { 152 | "crawlername": "crawler2" 153 | } 154 | }, 155 | "Retry": [ 156 | { 157 | "ErrorEquals": [ 158 | "Lambda.ServiceException", 159 | "Lambda.AWSLambdaException", 160 | "Lambda.SdkClientException" 161 | ], 162 | "IntervalSeconds": 2, 163 | "MaxAttempts": 6, 164 | "BackoffRate": 2 165 | } 166 | ], 167 | "End": true 168 | } 169 | } 170 | } 171 | 172 | 173 | --------------------------------------------------------------------------------