├── .gitignore
├── ETL Framework Template
├── ETL Framework Template.sln
├── ETL Framework Template.v12.suo
└── ETL Framework Template
│ ├── ETL Framework Template.database
│ ├── ETL Framework Template.dtproj
│ ├── ETL Framework Template.dtproj.user
│ ├── ETLTemplate.dtsx
│ ├── Project.params
│ ├── bin
│ └── Development
│ │ └── ETL Framework Template.ispac
│ └── obj
│ └── Development
│ ├── AgspringETLTemplate.dtsx
│ ├── BuildLog.xml
│ ├── ETL Framework Template.dtproj
│ └── Project.params
├── Framework Deployment Scripts
├── 01 Create Initial Databases
│ ├── 01 CREATE DATABASE EDW.sql
│ ├── 02 CREATE DATABASE ODS.sql
│ ├── 03 CREATE DATABASE Reporting.sql
│ ├── 04 CREATE DATABASE SSISManagement.sql
│ └── 05 CREATE INITIAL SCHEMAS.sql
├── 02 Date Dimension
│ ├── 01 CREATE AND POPULATE DimMonth AND DimDate.sql
│ ├── 02 add empty and error rows.sql
│ └── 03 ALTER TABLE DimDate ADD FiscalColumns.sql
├── 03 Framework Objects
│ ├── 01 Create ETL Framework Tables.sql
│ ├── 02 CREATE TABLE TableLoadReport.sql
│ ├── 03 Create ETL Framework Stored Procs.sql
│ ├── 04 CREATE PROC usp_ErrorLogByPackage.sql
│ ├── 05 CREATE PROC usp_BatchRunTime.sql
│ ├── 06 CREATE PROC usp_ErrorLogByBatch.sql
│ ├── 07 CREATE PROC usp_GetVariableValues.sql
│ ├── 08 CREATE PROC PackageRunTime.sql
│ ├── 09 CREATE TABLE SSISConfigurations.sql
│ ├── 10 CREATE TABLE Tables.sql
│ ├── 11 CREATE TABLE LoadObservations.sql
│ ├── 12 CREATE VIEW RowCountLog.sql
│ ├── 13 CREATE VIEW ShowAdverseTableLoads.sql
│ ├── 14 CREATE VIEW SSISErrorLogByPackage.sql
│ ├── 15 CREATE VIEW SSISPackageBatchRunTime.sql
│ ├── 16 CREATE VIEW SSISPackageRunTime.sql
│ └── 17 CREATE FUNCTION udf_CleanDate.sql
└── 04 Python Package Installation
│ └── InstallFrameworkPackages.py
├── LICENSE
├── README.md
├── Rapid Development Tools
└── data model creation tool.xlsm
├── Sample Directory Structures
├── Code Repo
│ ├── Python
│ │ ├── global_config
│ │ │ └── test_config.yaml
│ │ └── process_name
│ │ │ └── config
│ │ │ └── test_config.yaml
│ └── SQL
│ │ ├── Support Objects
│ │ ├── Functions
│ │ │ └── .gitkeep
│ │ └── Views
│ │ │ └── .gitkeep
│ │ └── Warehouse Load
│ │ ├── 01 Pull Data
│ │ └── .gitkeep
│ │ ├── 02 Clean Data
│ │ └── .gitkeep
│ │ ├── 03 Process Dimensions
│ │ └── .gitkeep
│ │ ├── 04 Process Facts
│ │ └── .gitkeep
│ │ ├── 05 Finalize And Audit
│ │ └── .gitkeep
│ │ ├── 06 Populate Reporting Tables
│ │ └── .gitkeep
│ │ └── 07 Monitoring
│ │ └── .gitkeep
├── File Processing And Storage
│ └── InterfaceAndExtractFiles
│ │ ├── Business Unit Name
│ │ └── ETL Process Name
│ │ │ ├── Archive
│ │ │ └── .gitkeep
│ │ │ ├── In
│ │ │ └── .gitkeep
│ │ │ └── Out
│ │ │ └── .gitkeep
│ │ └── Third Party Name
│ │ └── ETL Process Name
│ │ ├── Archive
│ │ └── .gitkeep
│ │ ├── In
│ │ └── .gitkeep
│ │ └── Out
│ │ └── .gitkeep
└── Warehouse File IO
│ └── FileExchange
│ ├── Business Unit Name
│ └── ETL Process Name
│ │ ├── In
│ │ └── .gitkeep
│ │ └── Out
│ │ └── .gitkeep
│ └── Third Party Name
│ └── ETL Process Name
│ ├── In
│ └── .gitkeep
│ └── Out
│ └── .gitkeep
└── Sample Scripts
├── 01 SQL
├── 01 Finalize And Audit Scripts
│ ├── 18 CREATE PROC usp_RecordRowCounts.sql
│ ├── 19 CREATE PROC usp_MarkRecordsAsProcessed.sql
│ └── 20 CREATE PROC usp_CheckForUnprocessedRecords.sql
├── 02 Monitoring Scripts
│ ├── 21 CREATE PROC usp_DisplayTablesNotLoading.sql
│ ├── 22 CREATE PROC usp_LoadTableLoadReportingTable.sql
│ └── 23 CREATE PROC usp_TableLoadMonitoring.sql
├── 03 Table Object Sample Scripts
│ ├── sample dimension table.sql
│ ├── sample fact table.sql
│ ├── sample indexed view.sql
│ ├── sample junk dimension table.sql
│ ├── sample master data management table.sql
│ └── sample stage table.sql
├── 04 Data Processing Sample Scripts
│ ├── batch processing.sql
│ ├── bulk insert.sql
│ ├── data cleansing.sql
│ ├── fact table load.sql
│ ├── remove dups.sql
│ ├── type I dimension processing.sql
│ └── type II dimension processing.sql
└── 05 Helper Scripts
│ ├── add rowhash.sql
│ ├── change collation.sql
│ ├── configuration insert sample script.sql
│ ├── documentation block.sql
│ ├── populate fact table with fake data.sql
│ ├── proc execution scripts.sql
│ ├── show all columns in database.sql
│ ├── troubleshooting.sql
│ └── utils.sql
├── 02 Python
├── Building Blocks
│ ├── CombineCSVsIntoOneFile.py
│ ├── ConvertCommaToPipeDelimitedFile.py
│ ├── ConvertExcelToCSV.py
│ ├── LoopingOverFilesInADirectory.py
│ ├── ProcessZipFile.py
│ ├── QueryDatabaseAndWriteLargeFile.py
│ ├── QueryDatabaseAndWriteSmallFile.py
│ ├── SendEmail.py
│ ├── StringMatching.py
│ └── YAMLConfigImport.py
├── Full Solutions
│ ├── DownloadMoveAndStoreDataCSV.py
│ ├── LoadLargeCSVsIntoDataWarehouseStagingTables.py
│ ├── MoveAndStoreDataExcel.py
│ └── ReloadFromArchive.py
└── Jupyter Notebooks
│ ├── Passive Monitoring System Design Theory.ipynb
│ └── PotentialMDMProcess.ipynb
├── 03 PySpark
├── ConnectToSpark.ipynb
├── ExampleDataProcessing.ipynb
└── LoadCSV.ipynb
└── 04 Windows Batch
└── call Python from SQL Server agent.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | workspace.xml
2 | *.bak
3 | *.docx
4 | .idea/
5 | config.yaml
6 |
7 | # Byte-compiled / optimized / DLL files
8 | __pycache__/
9 | *.py[cod]
10 | *$py.class
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | pip-wheel-metadata/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 | db.sqlite3
68 | db.sqlite3-journal
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 |
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 |
107 | # SageMath parsed files
108 | *.sage.py
109 |
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 |
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 |
123 | # Rope project settings
124 | .ropeproject
125 |
126 | # mkdocs documentation
127 | /site
128 |
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 |
134 | # Pyre type checker
135 | .pyre/
136 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2013
4 | VisualStudioVersion = 12.0.21005.1
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{159641D6-6404-4A2A-AE62-294DE0FE8301}") = "ETL Framework Template", "ETL Framework Template\ETL Framework Template.dtproj", "{20A0B72C-EC8F-41DA-BBE9-729FD188BB40}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Development|Default = Development|Default
11 | EndGlobalSection
12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
13 | {20A0B72C-EC8F-41DA-BBE9-729FD188BB40}.Development|Default.ActiveCfg = Development
14 | {20A0B72C-EC8F-41DA-BBE9-729FD188BB40}.Development|Default.Build.0 = Development
15 | EndGlobalSection
16 | GlobalSection(SolutionProperties) = preSolution
17 | HideSolutionNode = FALSE
18 | EndGlobalSection
19 | EndGlobal
20 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template.v12.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/ETL Framework Template/ETL Framework Template.v12.suo
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/ETL Framework Template.database:
--------------------------------------------------------------------------------
1 |
2 | ETL Framework Template
3 | ETL Framework Template
4 | 0001-01-01T00:00:00Z
5 | 0001-01-01T00:00:00Z
6 | 0001-01-01T00:00:00Z
7 | Unprocessed
8 | 0001-01-01T00:00:00Z
9 |
10 | Default
11 | Unchanged
12 |
13 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/ETL Framework Template.dtproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Project
4 | 12.0.2430.0
5 | 9.0.1.0
6 | $base64$PFNvdXJjZUNvbnRyb2xJbmZvIHhtbG5zOnhzZD0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEiIHhtbG5zOnhzaT0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEtaW5zdGFuY2UiIHhtbG5zOmRkbDI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yIiB4bWxuczpkZGwyXzI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yLzIiIHhtbG5zOmRkbDEwMF8xMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDA4L2VuZ2luZS8xMDAvMTAwIiB4bWxuczpkZGwyMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAiIHhtbG5zOmRkbDIwMF8yMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAvMjAwIiB4bWxuczpkZGwzMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAiIHhtbG5zOmRkbDMwMF8zMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAvMzAwIiB4bWxuczpkZGw0MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAiIHhtbG5zOmRkbDQwMF80MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAvNDAwIiB4bWxuczpkd2Q9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vRGF0YVdhcmVob3VzZS9EZXNpZ25lci8xLjAiPg0KICA8RW5hYmxlZD5mYWxzZTwvRW5hYmxlZD4NCiAgPFByb2plY3ROYW1lPjwvUHJvamVjdE5hbWU+DQogIDxBdXhQYXRoPjwvQXV4UGF0aD4NCiAgPExvY2FsUGF0aD48L0xvY2FsUGF0aD4NCiAgPFByb3ZpZGVyPjwvUHJvdmlkZXI+DQo8L1NvdXJjZUNvbnRyb2xJbmZvPg==
7 |
8 | ETL Framework Template.database
9 | ETL Framework Template.database
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | {a9026253-42ad-4f4e-aa43-1ba25643a651}
18 | ETL Framework Template
19 | 1
20 | 0
21 | 0
22 |
23 |
24 | 2016-01-27T10:18:43.3891069-06:00
25 | Teletran1\Bob
26 | TELETRAN1
27 |
28 |
29 | AQAAANCMnd8BFdERjHoAwE/Cl+sBAAAAoxXkRWWSP0mQp0K92RmiqAAAAAACAAAAAAAQZgAAAAEAACAAAACK73t63Bg6yo9Ub6Fot2dL+mBlJ665CHhLzWUt2KJaqwAAAAAOgAAAAAIAACAAAAA3abbbQ6Y1z5NpX3/lTTeDtG3GIWYsZ9mfhFm+qwcc3JAAAAApeIQkmm3d95I1Nwi85t1m6zXyR6AszAaN0c/jUvFgxhjDw0gDQarrEvOUdjbhbNCHUH4wl9+PqB51jmK2Mx/N3jxIziw+aW+P3eNKdauBultxKdC/qbz9DmA4lZne5fDIiIQdTakPAWE31Oenfu/wySuzx42jxx4f5yweNHgwBApBXy1vLZ6Ys2hZfykOrjVAAAAA6UaIROxPH0CuqjQFCDtA81ZSx5R6qQS1hrcVJ1OyaV/bd2Al9m4twany8wIau6AAzEKspWNkiNXAbV30FRoXVg==
30 | 1
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | {C479EBAA-4361-4720-8947-D43C86591D2B}
42 | ETLTemplate
43 | 1
44 | 0
45 | 76
46 |
47 |
48 | {2ABBFB1B-C134-487A-9F1E-8B0E55598978}
49 | 8
50 |
51 |
52 | 1
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | 0
64 | 0
65 | 0
66 | Data Source=[your server];Initial Catalog=EDW;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;
67 | 18
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | 0
79 | 0
80 | 0
81 | EDW
82 | 18
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | 0
94 | 0
95 | 1
96 | 18
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | 0
108 | 0
109 | 0
110 | false
111 | 3
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 | 0
123 | 0
124 | 0
125 | [your server]
126 | 18
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 | 0
138 | 0
139 | 0
140 | 18
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 | 0
152 | 0
153 | 0
154 | Data Source=[your server];Initial Catalog=ODS;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;
155 | 18
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 | 0
167 | 0
168 | 0
169 | ODS
170 | 18
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 | 0
182 | 0
183 | 1
184 | 18
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 | 0
196 | 0
197 | 0
198 | false
199 | 3
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 | 0
211 | 0
212 | 0
213 | [your server]
214 | 18
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 | 0
226 | 0
227 | 0
228 | 18
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 | 0
240 | 0
241 | 0
242 | Data Source=AgspringDW;Initial Catalog=SSISManagement;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;
243 | 18
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 | 0
255 | 0
256 | 0
257 | SSISManagement
258 | 18
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 | 0
270 | 0
271 | 1
272 | 18
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 | 0
284 | 0
285 | 0
286 | false
287 | 3
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 | 0
299 | 0
300 | 0
301 | AgspringDW
302 | 18
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 | 0
314 | 0
315 | 0
316 | 18
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 | Development
330 |
331 | bin
332 |
333 |
334 |
335 |
336 |
337 |
338 | LastModifiedTime
339 | LastModifiedTime
340 | 2016-03-24T20:15:25.1880445Z
341 |
342 |
343 |
344 |
345 |
346 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/ETL Framework Template.dtproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Development
6 |
7 | true
8 |
9 |
10 | false
11 | true
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/Project.params:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/bin/Development/ETL Framework Template.ispac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/ETL Framework Template/ETL Framework Template/bin/Development/ETL Framework Template.ispac
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/obj/Development/BuildLog.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ETL Framework Template
5 | 2016-04-06T20:56:34.9057414Z
6 | EncryptSensitiveWithUserKey
7 |
8 |
9 |
10 | AgspringETLTemplate.dtsx
11 | 2016-04-06T21:17:31.1938222Z
12 | EncryptSensitiveWithUserKey
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/obj/Development/ETL Framework Template.dtproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Project
4 | 12.0.2430.0
5 | 9.0.1.0
6 | $base64$PFNvdXJjZUNvbnRyb2xJbmZvIHhtbG5zOnhzZD0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEiIHhtbG5zOnhzaT0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEtaW5zdGFuY2UiIHhtbG5zOmRkbDI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yIiB4bWxuczpkZGwyXzI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yLzIiIHhtbG5zOmRkbDEwMF8xMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDA4L2VuZ2luZS8xMDAvMTAwIiB4bWxuczpkZGwyMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAiIHhtbG5zOmRkbDIwMF8yMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAvMjAwIiB4bWxuczpkZGwzMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAiIHhtbG5zOmRkbDMwMF8zMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAvMzAwIiB4bWxuczpkZGw0MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAiIHhtbG5zOmRkbDQwMF80MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAvNDAwIiB4bWxuczpkd2Q9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vRGF0YVdhcmVob3VzZS9EZXNpZ25lci8xLjAiPg0KICA8RW5hYmxlZD5mYWxzZTwvRW5hYmxlZD4NCiAgPFByb2plY3ROYW1lPjwvUHJvamVjdE5hbWU+DQogIDxBdXhQYXRoPjwvQXV4UGF0aD4NCiAgPExvY2FsUGF0aD48L0xvY2FsUGF0aD4NCiAgPFByb3ZpZGVyPjwvUHJvdmlkZXI+DQo8L1NvdXJjZUNvbnRyb2xJbmZvPg==
7 |
8 | ETL Framework Template.database
9 | ETL Framework Template.database
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | {a9026253-42ad-4f4e-aa43-1ba25643a651}
18 | ETL Framework Template
19 | 1
20 | 0
21 | 0
22 |
23 |
24 | 2016-01-27T10:18:43.3891069-06:00
25 | Teletran1\Bob
26 | TELETRAN1
27 |
28 |
29 | AQAAANCMnd8BFdERjHoAwE/Cl+sBAAAAKWVJ8+Y0R0qCCKJfAf2yYAAAAAACAAAAAAADZgAAwAAAABAAAAD5rTNyuIACjLuEANeM4VmBAAAAAASAAACgAAAAEAAAAGR7+ybDlWCudzg7LVSzRX2IAAAAoggauLnNDyTbLXJxRQ4sn6bIBQQwZvC+mIG09yeDyeywtsY/0NjGxf8VEIWZNulvNpxialsQuE7T4WNN7zsZH8VzMtLBY1HtU/CxsfU5iMrhhCHJt81tmY9FCeyX/RuOHdJGeJ0M1Q5TQMw1vyZrIWUW+ETxeC8UZpcdxrxHgvZhOp73W8z6vxQAAABhbvGUmrDBi4P4Cifs4vvXJ758rQ==
30 | 1
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | {860DCA67-C5C3-4BB6-A243-ED8FA812EE9B}
42 | AgspringETLTemplate
43 | 1
44 | 0
45 | 64
46 |
47 |
48 | {F3692D1E-2A6D-4B0F-B8B2-495D2A146F24}
49 | 8
50 |
51 |
52 | 1
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | 0
64 | 0
65 | 0
66 | Data Source=AgspringDW;Initial Catalog=SSISManagement;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;
67 | 18
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | 0
79 | 0
80 | 0
81 | SSISManagement
82 | 18
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | 0
94 | 0
95 | 1
96 | 18
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | 0
108 | 0
109 | 0
110 | false
111 | 3
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 | 0
123 | 0
124 | 0
125 | AgspringDW
126 | 18
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 | 0
138 | 0
139 | 0
140 | 18
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 | Development
154 |
155 | bin
156 |
157 |
158 |
159 |
160 |
161 |
162 | LastModifiedTime
163 | LastModifiedTime
164 | 2016-03-24T20:15:25.1880445Z
165 |
166 |
167 |
168 |
169 |
170 |
--------------------------------------------------------------------------------
/ETL Framework Template/ETL Framework Template/obj/Development/Project.params:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/01 Create Initial Databases/01 CREATE DATABASE EDW.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct20
4 | -- Description: Create the data warehouse database.
5 | -- Directions for use: Replace [Your Data Warehouse Database Name]
6 | -- with the name specific to your EDW, run this script on all
7 | -- EDW related servers.
8 | -- =============================================
9 |
10 | CREATE DATABASE [Your Data Warehouse Database Name]
11 | COLLATE Latin1_General_CS_AS
--------------------------------------------------------------------------------
/Framework Deployment Scripts/01 Create Initial Databases/02 CREATE DATABASE ODS.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct20
4 | -- Description: Create the operational data store database.
5 | -- Directions for use: Simply run this script on all
6 | -- EDW related servers.
7 | -- =============================================
8 |
9 | CREATE DATABASE ODS
10 | COLLATE Latin1_General_CS_AS
--------------------------------------------------------------------------------
/Framework Deployment Scripts/01 Create Initial Databases/03 CREATE DATABASE Reporting.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct20
4 | -- Description: Create the Reporting database.
5 | -- Directions for use: Simply run this script on all
6 | -- EDW related servers.
7 | -- =============================================
8 |
9 | CREATE DATABASE Reporting
10 | COLLATE Latin1_General_CS_AS
--------------------------------------------------------------------------------
/Framework Deployment Scripts/01 Create Initial Databases/04 CREATE DATABASE SSISManagement.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct20
4 | -- Description: Create the SSISManagemt database.
5 | -- Directions for use: Simply run this script on all
6 | -- EDW related servers.
7 | -- =============================================
8 |
9 | CREATE DATABASE [SSISManagement]
10 | COLLATE Latin1_General_CS_AS
--------------------------------------------------------------------------------
/Framework Deployment Scripts/01 Create Initial Databases/05 CREATE INITIAL SCHEMAS.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct20
4 | -- Description: Create the universal schemas.
5 | -- Directions for use: Replace [Your Data Warehouse Database Name]
6 | -- with the name specific to your EDW, run this script on all
7 | -- EDW related servers.
8 | -- =============================================
9 |
10 | USE [Your Data Warehouse Database Name]
11 |
12 | DROP SCHEMA IF EXISTS dw
13 |
14 | GO
15 |
16 | CREATE SCHEMA dw
17 |
18 | GO
19 |
20 | USE ODS
21 |
22 | DROP SCHEMA IF EXISTS cm
23 |
24 | GO
25 |
26 | CREATE SCHEMA cm
27 |
28 | GO
29 |
30 | DROP SCHEMA IF EXISTS rpt
31 |
32 | GO
33 |
34 | CREATE SCHEMA rpt
35 |
36 | GO
37 |
38 | DROP SCHEMA IF EXISTS vol
39 |
40 | GO
41 |
42 | CREATE SCHEMA vol
43 |
44 | GO
45 |
46 | DROP SCHEMA IF EXISTS mdm
47 |
48 | GO
49 |
50 | CREATE SCHEMA mdm
51 |
52 | GO
53 |
54 | DROP SCHEMA IF EXISTS ms
55 |
56 | GO
57 |
58 | CREATE SCHEMA ms
--------------------------------------------------------------------------------
/Framework Deployment Scripts/02 Date Dimension/01 CREATE AND POPULATE DimMonth AND DimDate.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Framework Deployment Scripts/02 Date Dimension/01 CREATE AND POPULATE DimMonth AND DimDate.sql
--------------------------------------------------------------------------------
/Framework Deployment Scripts/02 Date Dimension/02 add empty and error rows.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 19Oct20
4 | -- Description: Add empty and error records to date dimension
5 | -- Directions for use: Replace [YourDataWarehouseName] with the specific name of your EDW and run.
6 | -- =============================================
7 | USE [YourDataWarehouseName]
8 |
9 | INSERT INTO dw.DimDate(DateCK, FormattedDate)
10 | SELECT '00000000', 'No Date'
11 | UNION ALL
12 | SELECT '11111111', 'Not A Date'
--------------------------------------------------------------------------------
/Framework Deployment Scripts/02 Date Dimension/03 ALTER TABLE DimDate ADD FiscalColumns.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Framework Deployment Scripts/02 Date Dimension/03 ALTER TABLE DimDate ADD FiscalColumns.sql
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/01 Create ETL Framework Tables.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: This script builds the tables that are used to log SSIS package activity.
5 | -- Directions for use:Simply run the script.
6 | -- =============================================
7 |
8 | USE [SSISManagement]
9 | GO
10 |
11 | CREATE TABLE [dbo].[Package](
12 | [PackageID] [int] IDENTITY(1,1) NOT NULL,
13 | [PackageGUID] [uniqueidentifier] NOT NULL,
14 | [PackageName] [varchar](255) NOT NULL,
15 | [CreationDate] [datetime] NOT NULL,
16 | [CreatedBy] [varchar](255) NOT NULL,
17 | [EnteredDateTime] [datetime] NOT NULL DEFAULT getdate(),
18 | CONSTRAINT [PK_Package] PRIMARY KEY CLUSTERED (
19 | [PackageID] ASC
20 | ))
21 | GO
22 |
23 | CREATE TABLE [dbo].[PackageVersion](
24 | [PackageVersionID] [int] IDENTITY(1,1) NOT NULL,
25 | [PackageVersionGUID] [uniqueidentifier] NOT NULL,
26 | [PackageID] [int] NOT NULL,
27 | [VersionMajor] [int] NOT NULL,
28 | [VersionMinor] [int] NOT NULL,
29 | [VersionBuild] [int] NOT NULL,
30 | [VersionComment] [varchar](1000) NOT NULL,
31 | [EnteredDateTime] [datetime] NOT NULL DEFAULT getdate(),
32 | CONSTRAINT [PK_PackageVersion] PRIMARY KEY CLUSTERED (
33 | [PackageVersionID] ASC
34 | ))
35 | GO
36 | ALTER TABLE [dbo].[PackageVersion] WITH CHECK ADD CONSTRAINT [FK_PackageVersion_Package] FOREIGN KEY([PackageID])
37 | REFERENCES [dbo].[Package] ([PackageID])
38 | GO
39 | ALTER TABLE [dbo].[PackageVersion] CHECK CONSTRAINT [FK_PackageVersion_Package]
40 | GO
41 |
42 | CREATE TABLE [dbo].[BatchLog](
43 | [BatchLogID] [int] IDENTITY(1,1) NOT NULL,
44 | [StartDateTime] [datetime] NOT NULL DEFAULT getdate(),
45 | [EndDateTime] [datetime] NULL,
46 | [Status] [char](1) NOT NULL,
47 | CONSTRAINT [PK_BatchLog] PRIMARY KEY CLUSTERED (
48 | [BatchLogID] ASC
49 | ))
50 | GO
51 |
52 | CREATE TABLE [dbo].[PackageLog](
53 | [PackageLogID] [int] IDENTITY(1,1) NOT NULL,
54 | [BatchLogID] [int] NOT NULL,
55 | [PackageVersionID] [int] NOT NULL,
56 | [ExecutionInstanceID] [uniqueidentifier] NOT NULL,
57 | [MachineName] [varchar](64) NOT NULL,
58 | [UserName] [varchar](64) NOT NULL,
59 | [StartDateTime] [datetime] NOT NULL DEFAULT getdate(),
60 | [EndDateTime] [datetime] NULL,
61 | [Status] [char](1) NOT NULL,
62 | CONSTRAINT [PK_PackageLog] PRIMARY KEY CLUSTERED (
63 | [PackageLogID] ASC
64 | ))
65 | GO
66 | ALTER TABLE [dbo].[PackageLog] WITH CHECK ADD CONSTRAINT [FK_PackageLog_BatchLog] FOREIGN KEY([BatchLogID])
67 | REFERENCES [dbo].[BatchLog] ([BatchLogID])
68 | GO
69 | ALTER TABLE [dbo].[PackageLog] CHECK CONSTRAINT [FK_PackageLog_BatchLog]
70 | GO
71 | ALTER TABLE [dbo].[PackageLog] WITH CHECK ADD CONSTRAINT [FK_PackageLog_PackageVersion] FOREIGN KEY([PackageVersionID])
72 | REFERENCES [dbo].[PackageVersion] ([PackageVersionID])
73 | GO
74 | ALTER TABLE [dbo].[PackageLog] CHECK CONSTRAINT [FK_PackageLog_PackageVersion]
75 | GO
76 |
77 | CREATE TABLE [dbo].[PackageErrorLog](
78 | [PackageErrorLogID] [int] IDENTITY(1,1) NOT NULL,
79 | [PackageLogID] [int] NOT NULL,
80 | [SourceName] [varchar](64) NOT NULL,
81 | [SourceID] [uniqueidentifier] NOT NULL,
82 | [ErrorCode] [int] NULL,
83 | [ErrorDescription] [varchar](2000) NULL,
84 | [LogDateTime] [datetime] NOT NULL,
85 | CONSTRAINT [PK_PackageErrorLog] PRIMARY KEY CLUSTERED (
86 | [PackageErrorLogID] ASC
87 | ))
88 | GO
89 | ALTER TABLE [dbo].[PackageErrorLog] WITH CHECK ADD CONSTRAINT [FK_PackageErrorLog_PackageLog] FOREIGN KEY([PackageLogID])
90 | REFERENCES [dbo].[PackageLog] ([PackageLogID])
91 | GO
92 | ALTER TABLE [dbo].[PackageErrorLog] CHECK CONSTRAINT [FK_PackageErrorLog_PackageLog]
93 | GO
94 |
95 | CREATE TABLE [dbo].[PackageTaskLog](
96 | [PackageTaskLogID] [int] IDENTITY(1,1) NOT NULL,
97 | [PackageLogID] [int] NOT NULL,
98 | [SourceName] [varchar](255) NOT NULL,
99 | [SourceID] [uniqueidentifier] NOT NULL,
100 | [StartDateTime] [datetime] NOT NULL,
101 | [EndDateTime] [datetime] NULL,
102 | CONSTRAINT [PK_PackageTaskLog] PRIMARY KEY CLUSTERED (
103 | [PackageTaskLogID] ASC
104 | ))
105 | GO
106 | ALTER TABLE [dbo].[PackageTaskLog] WITH CHECK ADD CONSTRAINT [FK_PackageTaskLog_PackageLog] FOREIGN KEY([PackageLogID])
107 | REFERENCES [dbo].[PackageLog] ([PackageLogID])
108 | GO
109 | ALTER TABLE [dbo].[PackageTaskLog] CHECK CONSTRAINT [FK_PackageTaskLog_PackageLog]
110 | GO
111 |
112 | CREATE TABLE [dbo].[PackageVariableLog](
113 | [PackageVariableLogID] [int] IDENTITY(1,1) NOT NULL,
114 | [PackageLogID] [int] NOT NULL,
115 | [VariableName] [varchar](255) NOT NULL,
116 | [VariableValue] [varchar](max) NOT NULL,
117 | [LogDateTime] [datetime] NOT NULL,
118 | CONSTRAINT [PK_PackageVariableLog] PRIMARY KEY CLUSTERED (
119 | [PackageVariableLogID] ASC
120 | ))
121 | GO
122 | ALTER TABLE [dbo].[PackageVariableLog] WITH CHECK ADD CONSTRAINT [FK_PackageVariableLog_PackageLog] FOREIGN KEY([PackageLogID])
123 | REFERENCES [dbo].[PackageLog] ([PackageLogID])
124 | GO
125 | ALTER TABLE [dbo].[PackageVariableLog] CHECK CONSTRAINT [FK_PackageVariableLog_PackageLog]
126 | GO
127 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/02 CREATE TABLE TableLoadReport.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: This builds the table that holds volumetric data for table loads.
5 | -- Directions for use:Simply run the script.
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP TABLE IF EXISTS [rpt].[TableLoadReport]
13 | GO
14 |
15 | SET ANSI_NULLS ON
16 | GO
17 |
18 | SET QUOTED_IDENTIFIER ON
19 | GO
20 |
21 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[rpt].[TableLoadReport]') AND type in (N'U'))
22 | BEGIN
23 | CREATE TABLE [rpt].[TableLoadReport](
24 | [TableLoadReportID] [bigint] IDENTITY(1,1) NOT NULL,
25 | [TableName] [nvarchar](50) NULL,
26 | [DateOfCurrentObservation] [datetime] NULL,
27 | [DateOfLastObservation] [datetime] NULL,
28 | [RowCount] [int] NULL,
29 | [ChangeFromLastObservation] [int] NULL,
30 | [CurrentThreeSDLevel] [numeric](18, 4) NULL,
31 | [AsOf] [date] NULL,
32 | CONSTRAINT [PK_TableLoadReport] PRIMARY KEY CLUSTERED
33 | (
34 | [TableLoadReportID] ASC
35 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
36 | ) ON [PRIMARY]
37 | END
38 | GO
39 |
40 |
41 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/03 Create ETL Framework Stored Procs.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: These are the stored procedures that SSIS uses to log activity.
5 | -- Note that they do not conform to the naming convention for stored procedures.
6 | -- Directions for use:Simply run the script.
7 | -- =============================================
8 |
9 | USE [SSISManagement]
10 | GO
11 |
12 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageStart]
13 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageEnd]
14 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageError]
15 | DROP PROCEDURE IF EXISTS [dbo].[LogTaskPreExecute]
16 | DROP PROCEDURE IF EXISTS [dbo].[LogTaskPostExecute]
17 | DROP PROCEDURE IF EXISTS [dbo].[LogVariableValueChanged]
18 | GO
19 |
20 | CREATE PROCEDURE [dbo].[LogPackageStart]
21 | ( @BatchLogID int
22 | ,@PackageName varchar(255)
23 | ,@ExecutionInstanceID uniqueidentifier
24 | ,@MachineName varchar(64)
25 | ,@UserName varchar(64)
26 | ,@StartDateTime datetime
27 | ,@PackageVersionGUID uniqueidentifier
28 | ,@VersionMajor int
29 | ,@VersionMinor int
30 | ,@VersionBuild int
31 | ,@VersionComment varchar(1000)
32 | ,@PackageGUID uniqueidentifier
33 | ,@CreationDate datetime
34 | ,@CreatedBy varchar(255)
35 | )
36 |
37 | AS
38 | BEGIN
39 | SET NOCOUNT ON
40 |
41 | DECLARE @PackageID int
42 | ,@PackageVersionID int
43 | ,@PackageLogID int
44 | ,@EndBatchAudit bit
45 |
46 | /* Initialize Variables */
47 | SELECT @EndBatchAudit = 0
48 |
49 | /* Get Package Metadata ID */
50 | IF NOT EXISTS (SELECT 1 FROM dbo.Package WHERE PackageGUID = @PackageGUID AND PackageName = @PackageName)
51 | Begin
52 | INSERT INTO dbo.Package (PackageGUID, PackageName, CreationDate, CreatedBy)
53 | VALUES (@PackageGUID, @PackageName, @CreationDate, @CreatedBy)
54 | End
55 |
56 | SELECT @PackageID = PackageID
57 | FROM dbo.Package
58 | WHERE PackageGUID = @PackageGUID
59 | AND PackageName = @PackageName
60 |
61 | /* Get Package Version MetaData ID */
62 | IF NOT EXISTS (SELECT 1 FROM dbo.PackageVersion WHERE PackageVersionGUID = @PackageVersionGUID)
63 | Begin
64 | INSERT INTO dbo.PackageVersion (PackageID, PackageVersionGUID, VersionMajor, VersionMinor, VersionBuild, VersionComment)
65 | VALUES (@PackageID, @PackageVersionGUID, @VersionMajor, @VersionMinor, @VersionBuild, @VersionComment)
66 | End
67 | SELECT @PackageVersionID = PackageVersionID
68 | FROM dbo.PackageVersion
69 | WHERE PackageVersionGUID = @PackageVersionGUID
70 |
71 | /* Get BatchLogID */
72 | IF ISNULL(@BatchLogID,0) = 0
73 | Begin
74 | INSERT INTO dbo.BatchLog (StartDateTime, [Status])
75 | VALUES (@StartDateTime, 'R')
76 | SELECT @BatchLogID = SCOPE_IDENTITY()
77 | SELECT @EndBatchAudit = 1
78 | End
79 |
80 | /* Create PackageLog Record */
81 | INSERT INTO dbo.PackageLog (BatchLogID, PackageVersionID, ExecutionInstanceID, MachineName, UserName, StartDateTime, [Status])
82 | VALUES(@BatchLogID, @PackageVersionID, @ExecutionInstanceID, @MachineName, @UserName, @StartDateTime, 'R')
83 |
84 | SELECT @PackageLogID = SCOPE_IDENTITY()
85 |
86 | SELECT @BatchLogID as BatchLogID, @PackageLogID as PackageLogID, @EndBatchAudit as EndBatchAudit
87 |
88 | END
89 | GO
90 |
91 | CREATE PROCEDURE [dbo].[LogPackageEnd]
92 | ( @PackageLogID int
93 | ,@BatchLogID int
94 | ,@EndBatchAudit bit
95 | )
96 |
97 | AS
98 | BEGIN
99 | SET NOCOUNT ON
100 | UPDATE dbo.PackageLog
101 | SET Status = 'S'
102 | , EndDateTime = getdate()
103 | WHERE PackageLogID = @PackageLogID
104 |
105 | IF @EndBatchAudit = 1
106 | Begin
107 | UPDATE dbo.BatchLog
108 | SET Status = 'S'
109 | , EndDateTime = getdate()
110 | WHERE BatchLogID = @BatchLogID
111 | End
112 | END
113 | GO
114 |
115 | CREATE PROCEDURE [dbo].[LogPackageError]
116 | ( @PackageLogID int
117 | ,@BatchLogID int
118 | ,@SourceName varchar(64)
119 | ,@SourceID uniqueidentifier
120 | ,@ErrorCode int
121 | ,@ErrorDescription varchar(2000)
122 | ,@EndBatchAudit bit
123 | )
124 |
125 | AS
126 | BEGIN
127 | SET NOCOUNT ON
128 | INSERT INTO dbo.PackageErrorLog (PackageLogID, SourceName, SourceID, ErrorCode, ErrorDescription, LogDateTime)
129 | VALUES (@PackageLogID, @SourceName, @SourceID, @ErrorCode, @ErrorDescription, getdate())
130 |
131 | UPDATE dbo.PackageLog
132 | SET Status = 'F'
133 | WHERE PackageLogID = @PackageLogID
134 |
135 | IF @EndBatchAudit = 1
136 | Begin
137 | UPDATE dbo.BatchLog
138 | SET Status = 'F'
139 | , EndDateTime = getdate()
140 | WHERE BatchLogID = @BatchLogID
141 | End
142 | END
143 | GO
144 |
145 | CREATE PROCEDURE [dbo].[LogTaskPreExecute]
146 | ( @PackageLogID int
147 | ,@SourceName varchar(64)
148 | ,@SourceID uniqueidentifier
149 | ,@PackageID uniqueidentifier
150 | )
151 |
152 | AS
153 | BEGIN
154 | SET NOCOUNT ON
155 | IF @PackageID <> @SourceID
156 | AND @SourceName <> 'SQL LogPackageStart'
157 | AND @SourceName <> 'SQL LogPackageEnd'
158 | INSERT INTO dbo.PackageTaskLog (PackageLogID, SourceName, SourceID, StartDateTime)
159 | VALUES (@PackageLogID, @SourceName, @SourceID, getdate())
160 | END
161 | GO
162 |
163 | CREATE PROCEDURE [dbo].[LogTaskPostExecute]
164 | ( @PackageLogID int
165 | ,@SourceID uniqueidentifier
166 | ,@PackageID uniqueidentifier
167 | )
168 |
169 | AS
170 | BEGIN
171 | SET NOCOUNT ON
172 | IF @PackageID <> @SourceID
173 | UPDATE dbo.PackageTaskLog
174 | SET EndDateTime = getdate()
175 | WHERE PackageLogID = @PackageLogID AND SourceID = @SourceID
176 | AND EndDateTime is null
177 | END
178 | GO
179 |
180 | CREATE PROCEDURE [dbo].[LogVariableValueChanged]
181 | ( @PackageLogID int
182 | ,@VariableName varchar(255)
183 | ,@VariableValue varchar(max)
184 | )
185 | AS
186 | BEGIN
187 | SET NOCOUNT ON
188 | INSERT INTO dbo.PackageVariableLog(PackageLogID, VariableName, VariableValue, LogDateTime)
189 | VALUES (@PackageLogID, @VariableName, @VariableValue, getdate())
190 | END
191 | GO
192 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/04 CREATE PROC usp_ErrorLogByPackage.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 28May14
4 | -- Description: This allows you to see the performance of a
5 | -- specific package by passing in the package name as a parameter.
6 | -- Directions for use: Pass in a package name without the extension.
7 | -- =============================================
8 |
9 | USE SSISManagement
10 |
11 |
12 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'ErrorLogByPackage') AND type in (N'P', N'PC'))
13 | DROP PROCEDURE IF EXISTS usp_ErrorLogByPackage
14 | GO
15 |
16 | CREATE PROCEDURE usp_ErrorLogByPackage (@PackageName VARCHAR(MAX))
17 | AS
18 | BEGIN
19 | SET NOCOUNT ON
20 |
21 |
22 | SELECT BL.BatchLogID AS [Batch Log ID],
23 | P.PackageName AS [Package Name],
24 | PEL.SourceName AS [Task Name],
25 | PEL.ErrorDescription AS [Error Description],
26 | PEL.LogDateTime AS [Log Date Time]
27 | FROM PackageErrorLog PEL
28 | JOIN PackageLog PL
29 | ON PEL.PackageLogID = PL.PackageLogID
30 | JOIN PackageVersion PV
31 | ON PL.PackageVersionID = PV.PackageVersionID
32 | JOIN Package P
33 | ON PV.PackageID = P.PackageID
34 | JOIN BatchLog BL
35 | ON PL.BatchLogID = BL.BatchLogID
36 | WHERE P.PackageName = @PackageName
37 | ORDER BY PEL.LogDateTime Desc
38 |
39 | END
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/05 CREATE PROC usp_BatchRunTime.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 28May14
4 | -- Description: Lets you know how long a batch ran for.
5 | -- Directions for use:Pass in a batch ID.
6 | -- =============================================
7 |
8 | USE SSISManagement
9 |
10 |
11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'BatchRunTime') AND type in (N'P', N'PC'))
12 | DROP PROCEDURE IF EXISTS usp_BatchRunTime
13 | GO
14 |
15 | CREATE PROCEDURE usp_BatchRunTime (@BatchID INT)
16 | AS
17 | BEGIN
18 | SET NOCOUNT ON
19 |
20 |
21 | SELECT
22 | BL.BatchLogID AS [Batch ID],
23 | CONVERT(TIME,BL.EndDateTime - BL.StartDateTime) AS [Run Time],
24 | [BL].[Status]
25 | FROM BatchLog BL
26 |
27 | WHERE BL.BatchLogID = @BatchID
28 |
29 | END
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/06 CREATE PROC usp_ErrorLogByBatch.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 28May14
4 | -- Description: Error log by batch
5 | -- Directions for use: Pass in a batch ID.
6 | -- =============================================
7 |
8 | USE SSISManagement
9 |
10 |
11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'ErrorLogByBatch') AND type in (N'P', N'PC'))
12 | DROP PROCEDURE IF EXISTS usp_ErrorLogByBatch
13 | GO
14 |
15 | CREATE PROCEDURE usp_ErrorLogByBatch (@BatchID INT)
16 | AS
17 | BEGIN
18 | SET NOCOUNT ON
19 |
20 |
21 | SELECT BL.BatchLogID AS [Batch Log ID],
22 | P.PackageName AS [Package Name],
23 | PEL.SourceName AS [Task Name],
24 | PEL.ErrorDescription AS [Error Description],
25 | PEL.LogDateTime AS [Log Date Time]
26 | FROM PackageErrorLog PEL
27 | JOIN PackageLog PL
28 | ON PEL.PackageLogID = PL.PackageLogID
29 | JOIN PackageVersion PV
30 | ON PL.PackageVersionID = PV.PackageVersionID
31 | JOIN Package P
32 | ON PV.PackageID = P.PackageID
33 | JOIN BatchLog BL
34 | ON PL.BatchLogID = BL.BatchLogID
35 | WHERE BL.BatchLogID = @BatchID
36 | ORDER BY PEL.LogDateTime Desc
37 |
38 | END
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/07 CREATE PROC usp_GetVariableValues.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 28May14
4 | -- Description: Find the variables and their values
5 | -- that are currently being used for package configuration.
6 | -- Directions for use: Pass in a package name without the file extension.
7 | -- =============================================
8 |
9 | USE SSISManagement
10 | GO
11 |
12 |
13 | SET ANSI_NULLS ON
14 | GO
15 |
16 | SET QUOTED_IDENTIFIER ON
17 | GO
18 |
19 | DROP PROCEDURE IF EXISTS usp_GetVariableValues
20 |
21 | GO
22 |
23 | CREATE PROC usp_GetVariableValues @packageName NVARCHAR(255)
24 | AS
25 | SELECT VariableName, VariableValue
26 | FROM SSISConfigurations
27 | WHERE PackageName = @packageName
28 |
29 |
30 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/08 CREATE PROC PackageRunTime.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 28May14
4 | -- Description: Lets you know how long a package ran for
5 | -- Directions for use:Pass in a package name without the file extension.
6 | -- =============================================
7 |
8 | USE SSISManagement
9 |
10 |
11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'PackageRunTime') AND type in (N'P', N'PC'))
12 | DROP PROCEDURE IF EXISTS usp_PackageRunTime
13 | GO
14 |
15 | CREATE PROCEDURE usp_PackageRunTime (@PackageName VARCHAR(MAX))
16 | AS
17 | BEGIN
18 | SET NOCOUNT ON
19 |
20 |
21 | SELECT
22 | pkg.PackageName,
23 | pkglog.StartDateTime AS [Package Start Time],
24 | CONVERT(TIME,pkglog.EndDateTime - pkglog.StartDateTime) AS [Run Time],
25 | CASE pkglog.Status
26 | WHEN 'S' THEN 'Success'
27 | WHEN 'F' THEN 'Fail'
28 | WHEN 'R' THEN 'Running'
29 | END AS [Package Status]
30 | FROM dbo.PackageLog pkglog
31 | JOIN dbo.PackageVersion pkgvers
32 | ON pkglog.PackageVersionID = pkgvers.PackageVersionID
33 | JOIN dbo.Package pkg
34 | ON pkgvers.PackageID = pkg.PackageID
35 | WHERE pkg.PackageName = @PackageName
36 | ORDER BY pkglog.StartDateTime DESC
37 |
38 | END
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/09 CREATE TABLE SSISConfigurations.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: Creates the table that holds the configuration values for SSIS packages.
5 | -- Directions for use: Simply run the script.
6 | -- =============================================
7 |
8 | USE [SSISManagement]
9 | GO
10 |
11 |
12 |
13 | DROP TABLE IF EXISTS [dbo].[SSISConfigurations]
14 | GO
15 |
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[SSISConfigurations]') AND type in (N'U'))
24 | BEGIN
25 | CREATE TABLE [dbo].[SSISConfigurations](
26 | [ProcessConfigurationID] [int] IDENTITY(1,1) NOT NULL,
27 | [PackageName] [nvarchar](255) NOT NULL,
28 | [VariableName] [nvarchar](255) NOT NULL,
29 | [VariableValue] [nvarchar](255) NOT NULL,
30 | CONSTRAINT [PK_ProcessConfiguration] PRIMARY KEY CLUSTERED
31 | (
32 | [ProcessConfigurationID] ASC
33 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
34 | ) ON [PRIMARY]
35 | END
36 | GO
37 |
38 |
39 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/10 CREATE TABLE Tables.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7Dec20
4 | -- Description: Creates Tables table.
5 | -- Change Log:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP TABLE IF EXISTS [vol].[Tables]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[Tables]') AND type in (N'U'))
23 | BEGIN
24 | CREATE TABLE [vol].[Tables](
25 | [TableID] [bigint] IDENTITY(1,1) NOT NULL,
26 | [TableName] [nvarchar](50) NOT NULL,
27 | [DateCreated] [datetime] NOT NULL,
28 | CONSTRAINT [PK_Tables] PRIMARY KEY CLUSTERED
29 | (
30 | [TableID] ASC
31 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
32 | ) ON [PRIMARY]
33 | END
34 | GO
35 |
36 |
37 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/11 CREATE TABLE LoadObservations.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7Dec20
4 | -- Description: Creates the load obvservations table.
5 | -- Change Log:
6 | -- 15Oct20 Created doc bloc sample. - BW
7 | -- =============================================
8 |
9 | USE [ODS]
10 | GO
11 |
12 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[LoadObservations]') AND type in (N'U'))
13 | ALTER TABLE [vol].[LoadObservations] DROP CONSTRAINT IF EXISTS [FK_LoadObservations_Tables]
14 | GO
15 |
16 |
17 | DROP TABLE IF EXISTS [vol].[LoadObservations]
18 | GO
19 |
20 |
21 | SET ANSI_NULLS ON
22 | GO
23 |
24 | SET QUOTED_IDENTIFIER ON
25 | GO
26 |
27 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[LoadObservations]') AND type in (N'U'))
28 | BEGIN
29 | CREATE TABLE [vol].[LoadObservations](
30 | [LoadObservationID] [bigint] IDENTITY(1,1) NOT NULL,
31 | [TableID] [bigint] NOT NULL,
32 | [DateOfCurrentObservation] [datetime] NULL,
33 | [DateOfLastObservation] [datetime] NULL,
34 | [RowCount] [bigint] NULL,
35 | [ChangeFromLastObservation] [bigint] NULL,
36 | [CurrentThreeSDLevel] [numeric](18, 4) NULL,
37 | CONSTRAINT [PK_LoadObservations] PRIMARY KEY CLUSTERED
38 | (
39 | [LoadObservationID] ASC
40 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
41 | ) ON [PRIMARY]
42 | END
43 | GO
44 |
45 | IF NOT EXISTS (SELECT * FROM sys.foreign_keys WHERE object_id = OBJECT_ID(N'[vol].[FK_LoadObservations_Tables]') AND parent_object_id = OBJECT_ID(N'[vol].[LoadObservations]'))
46 | ALTER TABLE [vol].[LoadObservations] WITH CHECK ADD CONSTRAINT [FK_LoadObservations_Tables] FOREIGN KEY([TableID])
47 | REFERENCES [vol].[Tables] ([TableID])
48 | GO
49 |
50 | IF EXISTS (SELECT * FROM sys.foreign_keys WHERE object_id = OBJECT_ID(N'[vol].[FK_LoadObservations_Tables]') AND parent_object_id = OBJECT_ID(N'[vol].[LoadObservations]'))
51 | ALTER TABLE [vol].[LoadObservations] CHECK CONSTRAINT [FK_LoadObservations_Tables]
52 | GO
53 |
54 |
55 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/12 CREATE VIEW RowCountLog.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: A simple log of the amount of records that get processed in each load.
5 | -- Directions for use:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP VIEW IF EXISTS [dbo].[RowCountLog]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 |
23 | CREATE VIEW [dbo].[RowCountLog]
24 | AS
25 |
26 | SELECT
27 | [TableName],
28 | [DateOfCurrentObservation],
29 | [DateOfLastObservation],
30 | [RowCount],
31 | [ChangeFromLastObservation],
32 | [CurrentThreeSDLevel]
33 | FROM [vol].[Tables] t
34 | JOIN [vol].[LoadObservations] lo
35 | ON t.TableID = lo.TableID
36 | GO
37 |
38 |
39 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/13 CREATE VIEW ShowAdverseTableLoads.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: Show tables with anomalous data loads.
5 | -- Directions for use:
6 | -- =============================================
7 | USE [ODS]
8 | GO
9 |
10 |
11 | DROP VIEW IF EXISTS [dbo].[ShowAdverseTableLoads]
12 | GO
13 |
14 |
15 | SET ANSI_NULLS ON
16 | GO
17 |
18 | SET QUOTED_IDENTIFIER ON
19 | GO
20 |
21 |
22 |
23 | CREATE VIEW [dbo].[ShowAdverseTableLoads] AS
24 |
25 | --Find Adverse Loads
26 | SELECT t.TableID, t.TableName, lo.[RowCount] AS [Records In Table], lo.ChangeFromLastObservation AS [Records Loaded], lo.CurrentThreeSDLevel AS [Current Three Standard Deviation Level]
27 | FROM ODS.[vol].[Tables] t
28 | JOIN ODS.[vol].[LoadObservations] lo
29 | ON t.TableID = lo.TableID
30 | WHERE t.TableID NOT IN(
31 | --Get rid of tables with no records
32 | SELECT t.TableID
33 | FROM ODS.[vol].[Tables] t
34 | JOIN ODS.[vol].[LoadObservations] lo
35 | ON t.TableID = lo.TableID
36 | GROUP BY t.TableID
37 | HAVING AVG(lo.[RowCount]) = 0
38 | UNION
39 | --Get rid of tables with no load variance
40 | SELECT t.TableID
41 | FROM ODS.[vol].[Tables] t
42 | JOIN ODS.[vol].[LoadObservations] lo
43 | ON t.TableID = lo.TableID
44 | GROUP BY t.TableID
45 | HAVING AVG(lo.CurrentThreeSDLevel) = 0
46 | )
47 | AND lo.DateOfCurrentObservation = (SELECT MAX(DateOfCurrentObservation) FROM ODS.[vol].[LoadObservations])
48 | AND lo.ChangeFromLastObservation > lo.CurrentThreeSDLevel
49 |
50 |
51 | GO
52 |
53 |
54 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/14 CREATE VIEW SSISErrorLogByPackage.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: SSIS Errors By Package
5 | -- Directions for use:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP VIEW IF EXISTS [dbo].[SSISErrorLogByPackage]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 |
23 |
24 |
25 | CREATE VIEW [dbo].[SSISErrorLogByPackage] AS
26 |
27 | SELECT BL.BatchLogID AS [Batch Log ID],
28 | P.PackageName AS [Package Name],
29 | PEL.SourceName AS [Task Name],
30 | PEL.ErrorDescription AS [Error Description],
31 | PEL.LogDateTime AS [Log Date Time]
32 | FROM [SSISManagement]..PackageErrorLog PEL
33 | JOIN [SSISManagement]..PackageLog PL
34 | ON PEL.PackageLogID = PL.PackageLogID
35 | JOIN [SSISManagement]..PackageVersion PV
36 | ON PL.PackageVersionID = PV.PackageVersionID
37 | JOIN [SSISManagement]..Package P
38 | ON PV.PackageID = P.PackageID
39 | JOIN [SSISManagement]..BatchLog BL
40 | ON PL.BatchLogID = BL.BatchLogID
41 |
42 | GO
43 |
44 |
45 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/15 CREATE VIEW SSISPackageBatchRunTime.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: SSIS Run times by batch.
5 | -- Directions for use:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP VIEW IF EXISTS [dbo].[SSISPackageBatchRunTime]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 |
23 |
24 | CREATE VIEW [dbo].[SSISPackageBatchRunTime] AS
25 |
26 | SELECT
27 | BL.BatchLogID AS [Batch ID],
28 | CONVERT(TIME,BL.EndDateTime - BL.StartDateTime) AS [Run Time],
29 | CASE [BL].[Status]
30 | WHEN 'S' THEN 'Success'
31 | WHEN 'F' THEN 'Fail'
32 | WHEN 'R' THEN 'Running'
33 | END AS [Batch Status]
34 | FROM [SSISManagement]..BatchLog BL
35 |
36 | GO
37 |
38 |
39 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/16 CREATE VIEW SSISPackageRunTime.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 20Oct20
4 | -- Description: See a log of package run times.
5 | -- Directions for use:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP VIEW IF EXISTS [dbo].[SSISPackageRunTime]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 | CREATE VIEW [dbo].[SSISPackageRunTime] AS
23 |
24 | SELECT
25 | pkg.PackageName,
26 | pkglog.StartDateTime AS [Package Start Time],
27 | CONVERT(TIME,pkglog.EndDateTime - pkglog.StartDateTime) AS [Run Time],
28 | CASE pkglog.Status
29 | WHEN 'S' THEN 'Success'
30 | WHEN 'F' THEN 'Fail'
31 | WHEN 'R' THEN 'Running'
32 | END AS [Package Status]
33 | FROM SSISManagement..PackageLog pkglog
34 | JOIN SSISManagement..PackageVersion pkgvers
35 | ON pkglog.PackageVersionID = pkgvers.PackageVersionID
36 | JOIN SSISManagement..Package pkg
37 | ON pkgvers.PackageID = pkg.PackageID
38 |
39 | GO
40 |
41 |
42 |
--------------------------------------------------------------------------------
/Framework Deployment Scripts/03 Framework Objects/17 CREATE FUNCTION udf_CleanDate.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 17Jun18
4 | -- Description: Clean dates to conform with DimDateCK
5 | -- =============================================
6 |
7 | USE ODS
8 |
9 | GO
10 |
11 | DROP FUNCTION IF EXISTS udf_CleanDate
12 |
13 | GO
14 |
15 | CREATE FUNCTION udf_CleanDate(@DATE NVARCHAR(255))
16 |
17 | RETURNS NCHAR(8)
18 |
19 | AS
20 |
21 | BEGIN
22 |
23 | DECLARE @MinDate DATE
24 | DECLARE @MaxDate DATE
25 | DECLARE @EmptyRecordCode BIGINT = 00000000
26 | DECLARE @ErrorDateCode BIGINT = 11111111
27 |
28 | --Find and replace YourDatabase.YourSchema.DimDate with the name of your data warehouse
29 | --and the schema that it lives in.
30 |
31 | SELECT @MaxDate = CAST(MAX(DateCK) AS NCHAR(8))
32 | FROM YourDataWarehouse.dw.DimDate
33 | WHERE DateCK NOT IN (@EmptyRecordCode,@ErrorDateCode)
34 |
35 | SELECT @MinDate = CAST(MIN(DateCK) AS NCHAR(8))
36 | FROM YourDataWarehouse.dw.DimDate
37 | WHERE DateCK NOT IN (@EmptyRecordCode,@ErrorDateCode)
38 |
39 |
40 | --Set empty dates to the empty field code
41 | --This HAS to be done before you check for
42 | --bad dates. Then Set error dates to the error field code.
43 | --Finally, clean up the dates to conform to DimDateCK format.
44 |
45 | RETURN
46 | CASE
47 | WHEN @Date IS NULL THEN @EmptyRecordCode
48 | WHEN ISDATE(@Date) = 0 THEN @ErrorDateCode
49 | WHEN @Date < @MinDate THEN @ErrorDateCode
50 | WHEN @Date > @MaxDate THEN @ErrorDateCode
51 | ELSE CONVERT(VARCHAR(10),CAST(@Date AS DATE),112)
52 | END
53 |
54 |
55 | END
--------------------------------------------------------------------------------
/Framework Deployment Scripts/04 Python Package Installation/InstallFrameworkPackages.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import subprocess
3 |
4 | FailedInstalls = {}
5 | Packages = ['fuzzywuzzy','python-Levenshtein','pyunpack','pyodbc']
6 |
7 | for package in Packages:
8 | try:
9 | subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
10 | except Exception as e:
11 | FailedInstalls[package] = e
12 |
13 | for PackageName, ErrorMessage in FailedInstalls.items():
14 | print('Package ' + PackageName + ' failed to install because ' + str(ErrorMessage))
15 |
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Bob Wakefield
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Mass Street Analytics Data Warehouse ETL Framework
2 | This is a framework for moving data into and out of an enterprise data warehouse (EDW). It includes everything you need to set up a self-service data analytics platform.
3 | Documentation can be found [here](https://etl-framework.massstreetuniversity.com/data-warehouse-etl-framework/).
4 |
--------------------------------------------------------------------------------
/Rapid Development Tools/data model creation tool.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Rapid Development Tools/data model creation tool.xlsm
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/Python/global_config/test_config.yaml:
--------------------------------------------------------------------------------
1 | dns: 'DSN=ETL;'
2 | remote_root_directory: 'C:/'
3 | from_address: 'from email address of alert system'
4 | to_address: 'to email address of alert system'
5 | password: 'smtp server password'
6 | smtp_server: 'smtp server URL'
7 | smtp_port: smtp port value
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/Python/process_name/config/test_config.yaml:
--------------------------------------------------------------------------------
1 | process_specific_config: 'ProcessSpecificConfigValueForTesting'
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Support Objects/Functions/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Support Objects/Functions/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Support Objects/Views/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Support Objects/Views/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/01 Pull Data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/01 Pull Data/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/02 Clean Data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/02 Clean Data/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/03 Process Dimensions/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/03 Process Dimensions/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/04 Process Facts/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/04 Process Facts/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/05 Finalize And Audit/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/05 Finalize And Audit/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/06 Populate Reporting Tables/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/06 Populate Reporting Tables/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Code Repo/SQL/Warehouse Load/07 Monitoring/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/07 Monitoring/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Archive/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Archive/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/In/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/In/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Out/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Out/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Archive/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Archive/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/In/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/In/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Out/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Out/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/In/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/In/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/Out/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/Out/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/In/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/In/.gitkeep
--------------------------------------------------------------------------------
/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/Out/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/Out/.gitkeep
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/01 Finalize And Audit Scripts/18 CREATE PROC usp_RecordRowCounts.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct17
4 | -- Description: Records how many records each fact table is getting loaded with.
5 | --Change Log:
6 | --21Aug21 Updated for use with SQL Server 2019 BW
7 | -- =============================================
8 | USE ODS
9 |
10 | DROP PROCEDURE IF EXISTS usp_RecordRowCounts
11 |
12 | GO
13 |
14 | CREATE PROCEDURE usp_RecordRowCounts AS
15 | BEGIN
16 |
17 |
18 | --
19 | BEGIN TRANSACTION
20 |
21 | DROP TABLE IF EXISTS #counts
22 | DROP TABLE IF EXISTS #DateOfLastObservations
23 |
24 |
25 | CREATE TABLE #counts(TableName nvarchar(255), [RowCount] int)
26 | CREATE TABLE #DateOfLastObservations(TableID INT, [RowCount] INT, DateOfLastObservation DATETIME)
27 |
28 | --Change [YourDataWarehouse] to the name of your EDW database.
29 | EXEC [DVDRentalDW]..sp_MSforeachtable @command1='INSERT #counts (TableName, [RowCount]) SELECT REPLACE(SUBSTRING(''?'',8,LEN(''?'')),'']'',''''), COUNT(*) FROM ?'
30 |
31 | --SELECT *
32 | --FROM #counts
33 |
34 |
35 | MERGE vol.Tables as target
36 | USING(
37 | SELECT TableName, CURRENT_TIMESTAMP AS date_measured, [RowCount]
38 | FROM #counts
39 | ) AS source
40 | ON source.TableName COLLATE DATABASE_DEFAULT = target.TableName COLLATE DATABASE_DEFAULT
41 |
42 | WHEN NOT MATCHED THEN
43 | INSERT ([TableName],[DateCreated])
44 | VALUES (source.TableName, CURRENT_TIMESTAMP);
45 |
46 |
47 | INSERT INTO #DateOfLastObservations(TableID,DateOfLastObservation)
48 | SELECT [TableID], MAX([DateOfCurrentObservation]) AS DateOfLastObservation
49 | FROM vol.LoadObservations
50 | GROUP BY [TableID]
51 |
52 | ;
53 | WITH previous_observations(DateOfLastObservation, last_RowCount, TableID)
54 | AS(
55 | SELECT lo.[DateOfCurrentObservation], lo.[RowCount], lo.TableID
56 | FROM vol.LoadObservations lo
57 | JOIN #DateOfLastObservations llo
58 | ON lo.[DateOfCurrentObservation] = llo.DateOfLastObservation
59 | AND lo.[TableID] = llo.TableID
60 | ),
61 | current_sd_level(TableID, current_three_sd_level)
62 | AS(
63 | SELECT TableID, STDEV(ChangeFromLastObservation) * 3
64 | FROM vol.LoadObservations
65 | GROUP BY TableID
66 | )
67 | INSERT vol.[LoadObservations](TableID, DateOfCurrentObservation, DateOfLastObservation, [RowCount], ChangeFromLastObservation, [CurrentThreeSDLevel])
68 | SELECT t.TableID, CURRENT_TIMESTAMP AS DateOfCurrentObservation, po.DateOfLastObservation, c.[RowCount], ABS(c.[RowCount] - po.last_RowCount), ABS(sd.current_three_sd_level)
69 | FROM #counts c
70 | JOIN vol.Tables t
71 | ON c.TableName COLLATE DATABASE_DEFAULT = t.TableName COLLATE DATABASE_DEFAULT
72 | LEFT OUTER JOIN previous_observations po
73 | ON t.TableID = po.TableID
74 | LEFT OUTER JOIN current_sd_level sd
75 | ON t.TableID = sd.TableID
76 |
77 |
78 | COMMIT TRANSACTION
79 |
80 |
81 | DROP TABLE #counts
82 | DROP TABLE #DateOfLastObservations
83 |
84 | END
85 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/01 Finalize And Audit Scripts/19 CREATE PROC usp_MarkRecordsAsProcessed.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct17
4 | -- Description: Mark records in stage tables as
5 | -- having been processsed.
6 | -- Every fact table load you build will need code
7 | -- added to the usp_MarkRecordsAsProcessed stored
8 | -- procedure.
9 | -- =============================================
10 |
11 | USE ODS
12 | GO
13 |
14 | DROP PROCEDURE IF EXISTS dbo.usp_MarkRecordsAsProcessed
15 | GO
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 | CREATE PROCEDURE dbo.usp_MarkRecordsAsProcessed
24 | AS
25 | BEGIN
26 |
27 |
28 | UPDATE st
29 | SET Processed = 1
30 | FROM [eod].[Assets] st
31 | JOIN FSA..DimAssets pt
32 | ON pt.RowHash = st.UniqueRows
33 | WHERE st.ErrorRecord = 0
34 |
35 | UPDATE st
36 | SET Processed = 1
37 | FROM eod.Exchanges st
38 | JOIN FSA..FactExchangeCloseData pt
39 | ON pt.UniqueDims = st.UniqueDims
40 | WHERE st.ErrorRecord = 0
41 |
42 |
43 |
44 | END
45 | GO
46 |
47 |
48 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/01 Finalize And Audit Scripts/20 CREATE PROC usp_CheckForUnprocessedRecords.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 15Oct17
4 | -- Description: Check Stage Tables for unprocessed records.
5 | -- =============================================
6 |
7 |
8 | USE ODS
9 | GO
10 |
11 | DROP PROCEDURE IF EXISTS dbo.usp_CheckForUnprocessedRecords
12 | GO
13 |
14 | SET ANSI_NULLS ON
15 | GO
16 |
17 | SET QUOTED_IDENTIFIER ON
18 | GO
19 |
20 | CREATE PROCEDURE dbo.usp_CheckForUnprocessedRecords
21 | AS
22 | BEGIN
23 |
24 | --Update with your operator name.
25 | DECLARE @OperatorName sysname = N'YourOperatorName';
26 |
27 | DECLARE @OperatorEmailAddress NVARCHAR(100) = (SELECT email_address FROM msdb.dbo.sysoperators WHERE [name] = @OperatorName);
28 |
29 |
30 | --Check for unprocessed records
31 | SELECT *
32 | FROM [eod].[Assets]
33 | WHERE Processed = 0
34 |
35 |
36 | IF @@RowCount > 0
37 | BEGIN
38 | EXEC msdb.dbo.sp_send_dbmail
39 | @profile_name = 'Monitoring',
40 | @recipients = @OperatorEmailAddress,
41 | @subject = 'Unprocessed Records Exist',
42 | @body = 'There are unprocessed records in the Assets Staging Table' ;
43 | END
44 |
45 | SELECT *
46 | FROM [eod].[EODPrices]
47 | WHERE Processed = 0
48 |
49 | IF @@RowCount > 0
50 | BEGIN
51 | EXEC msdb.dbo.sp_send_dbmail
52 | @profile_name = 'Monitoring',
53 | @recipients = @OperatorEmailAddress,
54 | @subject = 'Unprocessed Records Exist',
55 | @body = 'There are unprocessed records in the EOD Prices Staging Table' ;
56 | END
57 |
58 | SELECT *
59 | FROM [eod].[Exchanges]
60 | WHERE Processed = 0
61 |
62 |
63 | IF @@RowCount > 0
64 | BEGIN
65 | EXEC msdb.dbo.sp_send_dbmail
66 | @profile_name = 'Monitoring',
67 | @recipients = @OperatorEmailAddress,
68 | @subject = 'Unprocessed Records Exist',
69 | @body = 'There are unprocessed records in the Exchanges staging table.' ;
70 | END
71 |
72 |
73 | END
74 | GO
75 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/02 Monitoring Scripts/21 CREATE PROC usp_DisplayTablesNotLoading.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 11April16
4 | -- Description: Displays tables that are not loading.
5 | -- Unwanted tables are tables that don't load that often by design.
6 | -- 10Mar17: Updated to take into account larger record counts
7 | -- Int datatype was too small.
8 | -- =============================================
9 |
10 | USE [ODS]
11 | GO
12 |
13 |
14 | DROP PROCEDURE IF EXISTS [dbo].[usp_DisplayTablesNotLoading]
15 | GO
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 | CREATE PROCEDURE [dbo].[usp_DisplayTablesNotLoading] AS
24 | BEGIN
25 |
26 | CREATE TABLE #UnWantedTables(TableID BIGINT)
27 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID)
28 |
29 | DECLARE @MostRecentDate DATETIME
30 |
31 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations]
32 |
33 | INSERT INTO #UnWantedTables(TableID)
34 | --Get rid of tables with no records
35 | SELECT t.TableID
36 | FROM [vol].[Tables] t
37 | JOIN [vol].[LoadObservations] lo
38 | ON t.TableID = lo.TableID
39 | GROUP BY t.TableID
40 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0
41 | UNION
42 | --Get rid of tables with no load variance
43 | SELECT t.TableID
44 | FROM [vol].[Tables] t
45 | JOIN [vol].[LoadObservations] lo
46 | ON t.TableID = lo.TableID
47 | GROUP BY t.TableID
48 | HAVING AVG(lo.CurrentThreeSDLevel) = 0
49 |
50 | --Manually get rid of unwanted tables.
51 | --INSERT INTO #UnWantedTables(TableID)
52 | --SELECT 131
53 |
54 |
55 |
56 | SELECT TableID, [TableName]
57 | FROM [vol].[Tables]
58 | WHERE TableID IN (
59 | SELECT t.TableID
60 | FROM [vol].[Tables] t
61 | JOIN [vol].[LoadObservations] lo
62 | ON t.TableID = lo.TableID
63 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables)
64 | AND lo.DateOfCurrentObservation BETWEEN DATEADD(DD,-3,CURRENT_TIMESTAMP) AND CURRENT_TIMESTAMP
65 | GROUP BY t.TableID
66 | HAVING AVG(CAST(lo.ChangeFromLastObservation AS FLOAT)) = 0
67 | )
68 |
69 | DROP TABLE #UnWantedTables
70 |
71 | END
72 |
73 |
74 |
75 |
76 | GO
77 |
78 |
79 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/02 Monitoring Scripts/22 CREATE PROC usp_LoadTableLoadReportingTable.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7April16
4 | -- Description: Loads table load reporting table
5 | -- 23Dec16: Updated to take into account larger record counts
6 | -- Int datatype was too small.
7 | -- =============================================
8 |
9 | USE [ODS]
10 | GO
11 |
12 |
13 | DROP PROCEDURE IF EXISTS [dbo].[usp_LoadTableLoadReportingTable]
14 | GO
15 |
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 |
24 | CREATE PROCEDURE [dbo].[usp_LoadTableLoadReportingTable] AS
25 | BEGIN
26 |
27 | CREATE TABLE #UnWantedTables(TableID INT)
28 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID)
29 |
30 | DECLARE @MostRecentDate DATETIME
31 | DECLARE @MostRecentDateInReportingTable DATE
32 |
33 |
34 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations]
35 | SELECT @MostRecentDateInReportingTable = MAX(DateOfCurrentObservation) FROM rpt.TableLoadReport
36 |
37 | INSERT INTO #UnWantedTables(TableID)
38 | --Get rid of tables with no records
39 | SELECT t.TableID
40 | FROM [vol].[Tables] t
41 | JOIN [vol].[LoadObservations] lo
42 | ON t.TableID = lo.TableID
43 | GROUP BY t.TableID
44 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0
45 | UNION
46 | --Get rid of tables with no load variance
47 | SELECT t.TableID
48 | FROM [vol].[Tables] t
49 | JOIN [vol].[LoadObservations] lo
50 | ON t.TableID = lo.TableID
51 | GROUP BY t.TableID
52 | HAVING AVG(CAST(lo.CurrentThreeSDLevel AS BIGINT)) = 0
53 |
54 | --Remove Rarely loaded tables from load analysis
55 | --INSERT INTO #UnWantedTables(TableID)
56 | --SELECT 131
57 | --UNION ALL
58 | --SELECT 135
59 | --UNION ALL
60 | --SELECT 118
61 | --UNION ALL
62 | --SELECT 123
63 |
64 |
65 | IF ISNULL(@MostRecentDateInReportingTable,'19000101') <> CAST(CURRENT_TIMESTAMP AS DATE)
66 | BEGIN
67 | INSERT INTO rpt.TableLoadReport(
68 | TableName,
69 | DateOfCurrentObservation,
70 | DateOfLastObservation,
71 | [RowCount],
72 | ChangeFromLastObservation,
73 | CurrentThreeSDLevel,
74 | AsOf
75 | )
76 | SELECT
77 | t.TableName,
78 | lo.DateOfCurrentObservation,
79 | lo.DateOfLastObservation,
80 | lo.[RowCount],
81 | lo.ChangeFromLastObservation,
82 | lo.CurrentThreeSDLevel,
83 | CAST(CURRENT_TIMESTAMP AS DATE)
84 | FROM vol.Tables t
85 | JOIN vol.LoadObservations lo
86 | ON t.TableID = lo.TableID
87 | WHERE lo.CurrentThreeSDLevel IS NOT NULL
88 | AND t.TableID NOT IN (SELECT TableId FROM #UnWantedTables)
89 | ORDER BY t.TableName, lo.DateOfCurrentObservation ASC
90 | END
91 |
92 | DROP TABLE #UnWantedTables
93 |
94 | END
95 | GO
96 |
97 |
98 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/02 Monitoring Scripts/23 CREATE PROC usp_TableLoadMonitoring.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7April16
4 | -- Description: Checks to see if anything weird is
5 | -- going on with the tables loads and sends an alert
6 | -- when something strange is found.
7 | -- Script looks for unusual load amounts
8 | -- and tables not being loaded.
9 | -- 21Dec16: Updated to take into account larger record counts
10 | -- Int datatype was too small.
11 | -- =============================================
12 |
13 | USE [ODS]
14 | GO
15 |
16 |
17 | DROP PROCEDURE IF EXISTS [dbo].[usp_TableLoadMonitoring]
18 | GO
19 |
20 |
21 | SET ANSI_NULLS ON
22 | GO
23 |
24 | SET QUOTED_IDENTIFIER ON
25 | GO
26 |
27 | CREATE PROCEDURE [dbo].[usp_TableLoadMonitoring] AS
28 | BEGIN
29 |
30 | --Update with your operator name.
31 | DECLARE @OperatorName sysname = N'YourOperatorName';
32 |
33 | DECLARE @OperatorEmailAddress NVARCHAR(100) = (SELECT email_address FROM msdb.dbo.sysoperators WHERE [name] = @OperatorName);
34 |
35 | CREATE TABLE #UnWantedTables(TableID INT)
36 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID)
37 |
38 | DECLARE @MostRecentDate DATETIME
39 |
40 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations]
41 |
42 | INSERT INTO #UnWantedTables(TableID)
43 | --Get rid of tables with no records
44 | SELECT t.TableID
45 | FROM [vol].[Tables] t
46 | JOIN [vol].[LoadObservations] lo
47 | ON t.TableID = lo.TableID
48 | GROUP BY t.TableID
49 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0
50 | UNION
51 | --Get rid of tables with no load variance
52 | SELECT t.TableID
53 | FROM [vol].[Tables] t
54 | JOIN [vol].[LoadObservations] lo
55 | ON t.TableID = lo.TableID
56 | GROUP BY t.TableID
57 | HAVING AVG(lo.CurrentThreeSDLevel) = 0
58 |
59 | --Find Adverse Loads
60 | SELECT t.TableID
61 | FROM [vol].[Tables] t
62 | JOIN [vol].[LoadObservations] lo
63 | ON t.TableID = lo.TableID
64 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables)
65 | AND lo.DateOfCurrentObservation = @MostRecentDate
66 | AND lo.ChangeFromLastObservation > lo.CurrentThreeSDLevel
67 | --ORDER BY t.TableName
68 |
69 | IF @@RowCount > 0
70 | BEGIN
71 | EXEC msdb.dbo.sp_send_dbmail
72 | @profile_name = 'Monitoring',
73 | @recipients = @OperatorEmailAddress,
74 | @subject = 'Adverse Load Event',
75 | @body = 'One or more tables in the warehouse has experienced an anomalous load event. Check v_ShowAdverseTableLoads in the ODS database.' ;
76 | END
77 |
78 | --Remove Rarely loaded tables from 0 load analysis
79 | --INSERT INTO #UnWantedTables(TableID)
80 | --SELECT 135
81 | --UNION ALL
82 | --SELECT 131
83 | --UNION ALL
84 | --SELECT 118
85 | --UNION ALL
86 | --SELECT 123
87 |
88 |
89 |
90 | --Find tables not being loaded
91 | SELECT t.TableID
92 | FROM [vol].[Tables] t
93 | JOIN [vol].[LoadObservations] lo
94 | ON t.TableID = lo.TableID
95 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables)
96 | AND lo.DateOfCurrentObservation BETWEEN DATEADD(DD,-3,CURRENT_TIMESTAMP) AND CURRENT_TIMESTAMP
97 | GROUP BY t.TableID
98 | HAVING AVG(CAST(lo.ChangeFromLastObservation AS FLOAT)) = 0
99 |
100 |
101 | IF @@RowCount > 0
102 | BEGIN
103 | EXEC msdb.dbo.sp_send_dbmail
104 | @profile_name = 'Monitoring',
105 | @recipients = @OperatorEmailAddress,
106 | @subject = 'Adverse Load Event',
107 | @body = 'One or more tables in the warehouse have experienced three straight days of loading zero records. Run sp_DisplayTablesNotLoading in ODS database.';
108 | END
109 |
110 | DROP TABLE #UnWantedTables
111 |
112 | END
113 | GO
114 |
115 |
116 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample dimension table.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use:Replace YourEDW with the name for
6 | -- your specific data warehouse.
7 | -- Replace YourDimensionName with the name of your dimension.
8 | -- Then fill in your columns.
9 | -- =============================================
10 | USE [YourEDW]
11 |
12 | DROP TABLE IF EXISTS [dw].[DimYourDimensionName]
13 | GO
14 |
15 | SET ANSI_NULLS ON
16 | GO
17 |
18 | SET QUOTED_IDENTIFIER ON
19 | GO
20 |
21 | CREATE TABLE [dw].[DimYourDimensionName](
22 | [YourDimensionNameCK] [bigint] IDENTITY(1,1) NOT NULL,
23 | --your columns here.
24 | [CreatedBy] [nvarchar](50) NULL,
25 | [CreatedOn] [datetime] NULL,
26 | [UpdatedBy] [nvarchar](50) NULL,
27 | [UpdatedOn] [datetime] NULL,
28 | [SourceSystem] [nvarchar](100) NULL,
29 | [SourceSystemKey] [nvarchar](100) NULL,
30 | [EffectiveFrom] [datetime] NULL,
31 | [EffectiveTo] [datetime] NULL,
32 | [IsMostRecentRecord] [bit] NULL
33 | CONSTRAINT [PK_YourDimensionName] PRIMARY KEY CLUSTERED
34 | (
35 | [YourDimensionNameCK] ASC
36 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
37 | ) ON [PRIMARY]
38 |
39 | GO
40 |
41 | --index is optional
42 | DROP INDEX IF EXISTS [NCIDX_DimYourDimensionName_SourceSystemKey] ON [dw].[DimYourDimensionName]
43 | GO
44 |
45 |
46 | CREATE NONCLUSTERED INDEX [NCIDX_DimYourDimensionName_SourceSystemKey] ON [dw].[DimYourDimensionName]
47 | (
48 | [SourceSystemKey] ASC
49 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
50 | GO
51 |
52 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample fact table.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use: Add the PK columns of the dimensions
6 | -- that are attached to the fact table, then add your measures
7 | -- and degenerate dimensions. Add the dimension keys to the UniqueDims hash.
8 | -- Add unique rows if necessary.
9 | -- =============================================
10 |
11 | USE YourEDW
12 |
13 | DROP TABLE IF EXISTS [dw].[FactYourFactTableName]
14 | GO
15 |
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 | SET ARITHABORT ON
24 | GO
25 |
26 | CREATE TABLE [dw].[FactYourFactTableName](
27 | [RowID] [bigint] IDENTITY(1,1) NOT NULL,
28 | --dimension keys
29 | [TransactionID] NVARCHAR(100) NULL,
30 | --your columns.
31 | [CreatedOn] [datetime] NULL,
32 | [CreatedBy] [nvarchar](50) NULL,
33 | [UpdatedOn] [datetime] NULL,
34 | [UpdatedBy] [nvarchar](50) NULL,
35 | [SourceSystem] [nvarchar](50) NULL,
36 | [UniqueDims] AS (CONVERT([varbinary](35),hashbytes('SHA1',concat(
37 | CONVERT([nvarchar](35),[DimKey1CK],(0)),
38 | CONVERT([nvarchar](35),[DimKey2CK],(0)),
39 | CONVERT([nvarchar](35),[TransactionID],(0))
40 | )),(0))) PERSISTED,
41 | --unique rows is optional
42 | [UniqueRows] AS (CONVERT([varbinary](35),hashbytes('SHA1',concat(
43 | CONVERT([nvarchar](35),[Column1],(0)),
44 | CONVERT([nvarchar](35),[Column2],(0))
45 | )),(0))) PERSISTED,
46 | CONSTRAINT [PK_FactYourFactTableName] PRIMARY KEY CLUSTERED
47 | (
48 | [RowID] ASC
49 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY],
50 | CONSTRAINT [IX_FactYourFactTableName] UNIQUE NONCLUSTERED
51 | (
52 | [UniqueDims] ASC
53 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
54 | ) ON [PRIMARY]
55 |
56 | --index is optional
57 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_UniqueDims] ON [dw].[FactYourFactTableName]
58 | GO
59 |
60 |
61 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_UniqueDims] ON [dw].[FactYourFactTableName]
62 | (
63 | [UniqueDims] ASC
64 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
65 | GO
66 |
67 | --index is optional
68 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_UniqueRows] ON [dw].[FactYourFactTableName]
69 | GO
70 |
71 |
72 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_UniqueRows] ON [dw].[FactYourFactTableName]
73 | (
74 | [UniqueRows] ASC
75 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
76 | GO
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample indexed view.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use: Replace YourEDW with the name
6 | -- of your EDW. Drop in your select statement from your
7 | -- dimension devoid of audit columns.
8 | -- Replace YourDimensionName with your dimension
9 | -- Replace RenameCK with your renamed contrived key.
10 | -- =============================================
11 |
12 | USE YourEDW
13 |
14 | GO
15 | DROP VIEW IF EXISTS DimYourDimensionName
16 | GO
17 |
18 | CREATE VIEW DimYourDimensionName
19 |
20 | WITH SCHEMABINDING
21 | AS
22 | SELECT
23 | YourDimensionNameCK,
24 | Column1,
25 | Column2,
26 | ColumnN,
27 | IsMostRecentRow
28 | FROM [dw].[DimYourDimensionName]
29 | GO
30 | --Create an index on the view.
31 | CREATE UNIQUE CLUSTERED INDEX CIDX_DimYourDimensionName_YourDimensionNameCK ON DimYourDimensionName(YourDimensionNameCK);
32 | GO
33 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample junk dimension table.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use:
6 | -- Replace YourEDW with the name of your specific EDW.
7 | -- Insert your columns at line 15.
8 | -- Replace YourDimensionName with the name of your dimension.
9 | -- Put your columns in the row hash being careful to change the parameter on the datatype to fit the column.
10 | -- =============================================
11 |
12 | USE YourEDW
13 |
14 | DROP TABLE IF EXISTS [dw].[DimYourDimensionName]
15 | GO
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 |
24 | CREATE TABLE [dw].[DimYourDimensionName](
25 | [YourDimensionNameCK] [bigint] IDENTITY(1,1) NOT NULL,
26 | --your columns.
27 | [CreatedBy] [nvarchar](50) NULL,
28 | [CreatedOn] [datetime] NULL,
29 | [UpdatedBy] [nvarchar](50) NULL,
30 | [UpdatedOn] [datetime] NULL,
31 | [SourceSystem] [nvarchar](100) NULL,
32 | [SourceSystemKey] [nvarchar](100) NULL,
33 | [EffectiveFrom] [datetime] NULL,
34 | [EffectiveTo] [datetime] NULL,
35 | [IsMostRecentRecord] [bit] NULL,
36 | [RowHash] AS (CONVERT([binary](16),hashbytes('MD5',concat(
37 | CONVERT([nvarchar](35),Column1,0),
38 | CONVERT([nvarchar](35),Column2,0),
39 | CONVERT([nvarchar](35),Column3,0),
40 | CONVERT([nvarchar](35),Column4,0),
41 | CONVERT([nvarchar](35),Column5,0),
42 | CONVERT([nvarchar](35),Column6,0))),0)) PERSISTED,
43 | CONSTRAINT [PK_YourDimensionName] PRIMARY KEY CLUSTERED
44 | (
45 | [YourDimensionNameCK] ASC
46 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
47 | ) ON [PRIMARY]
48 | END
49 |
50 | --index is optional
51 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_RowHash] ON [dw].[FactYourFactTableName]
52 | GO
53 |
54 |
55 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_RowHash] ON [dw].[FactYourFactTableName]
56 | (
57 | [RowHash] ASC
58 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
59 | GO
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample master data management table.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use:
6 | -- 1. Replace YourDimensionName with the name of your specific master data management table.
7 | -- 1. Replace the source system key place holder columns with your source system keys.
8 | -- 1. Replace the source system key place holder columns with your source system keys in the row hash.
9 | -- =============================================
10 |
11 | USE ODS
12 | GO
13 |
14 | DROP INDEX IF EXISTS [NCIDX_YourDimensionNameMasterData_RowHash] ON [mdm].[YourDimensionNameMasterData]
15 | GO
16 |
17 | SET ARITHABORT ON
18 | SET CONCAT_NULL_YIELDS_NULL ON
19 | SET QUOTED_IDENTIFIER ON
20 | SET ANSI_NULLS ON
21 | SET ANSI_PADDING ON
22 | SET ANSI_WARNINGS ON
23 | SET NUMERIC_ROUNDABORT OFF
24 | GO
25 |
26 | DROP TABLE IF EXISTS [mdm].[YourDimensionNameMasterData]
27 | GO
28 |
29 |
30 | SET ANSI_NULLS ON
31 | GO
32 |
33 | SET QUOTED_IDENTIFIER ON
34 | GO
35 |
36 | CREATE TABLE [mdm].[YourDimensionNameMasterData](
37 | [YourDimensionNameEK] [bigint] IDENTITY(1000,1) NOT NULL,
38 | [SourceSystemKey1] [nvarchar](50) NOT NULL,
39 | [SourceSystemKey2] [nvarchar](50) NOT NULL,
40 | [RowHash] AS (CONVERT([binary](35),hashbytes('SHA1',concat(
41 | CONVERT([nvarchar](35),[SourceSystemKey1],(0)),
42 | CONVERT([nvarchar](35),[SourceSystemKey2],(0))
43 | )),(0))) PERSISTED,
44 | CONSTRAINT [PK_YourDimensionNameMasterData] PRIMARY KEY CLUSTERED
45 | (
46 | [YourDimensionNameEK] ASC
47 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
48 | ) ON [PRIMARY]
49 | GO
50 |
51 |
52 |
53 |
54 | CREATE UNIQUE NONCLUSTERED INDEX [NCIDX_YourDimensionNameMasterData_RowHash] ON [mdm].[YourDimensionNameMasterData]
55 | (
56 | [RowHash] ASC
57 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
58 | GO
59 |
60 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample stage table.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Directions for use:
6 | -- Replace YourSchemaName with the schema for your stage table.
7 | -- YourStageTableName with the name of your staging table.
8 | -- Drop your columns in row 16.
9 | -- Replace DatabaseName with the name of the source system.
10 | -- =============================================
11 | USE ODS
12 |
13 | DROP TABLE IF EXISTS YourSchemaName.YourStageTableNameData
14 | GO
15 |
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 |
24 | CREATE TABLE YourSchemaName.YourStageTableNameData(
25 | [ETLKey] [uniqueidentifier] NOT NULL,
26 | --your columns here.
27 | [UniqueDims] [varbinary](35) NULL,
28 | [UniqueRows] [varbinary](16) NULL,
29 | [SourceSystem] [nvarchar](255) NULL,
30 | [Cleansed] [bit] NULL,
31 | [ErrorRecord] [bit] NULL,
32 | [ErrorReason] [nvarchar](255) NULL,
33 | [Processed] [bit] NULL,
34 | [RunDate] [datetime] NULL,
35 | CONSTRAINT [PK_YourStageTableNameData] PRIMARY KEY CLUSTERED
36 | (
37 | [ETLKey] ASC
38 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
39 | ) ON [PRIMARY]
40 |
41 | GO
42 |
43 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_ETLKey] DEFAULT (newid()) FOR [ETLKey]
44 | GO
45 |
46 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_SourceSystem] DEFAULT (N'DatabaseName') FOR [SourceSystem]
47 | GO
48 |
49 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_Cleansed] DEFAULT ((0)) FOR [Cleansed]
50 | GO
51 |
52 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_ErrorRecord] DEFAULT ((0)) FOR [ErrorRecord]
53 | GO
54 |
55 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_Processed] DEFAULT ((0)) FOR [Processed]
56 | GO
57 |
58 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_RunDate] DEFAULT (getdate()) FOR [RunDate]
59 | GO
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/batch processing.sql:
--------------------------------------------------------------------------------
1 | --Split a batch up into chunks using a cursor.
2 | --This method can be used for most any large table with some modifications
3 | --It could also be refined further with an @Day variable (for example)
4 | --This is for the case where you already have the data in a table
5 | --and are ready to process it. Obviously, if you have a large flat file,
6 | --you can just use BULK INSERT.
7 |
8 |
9 | DECLARE @Year INT
10 | DECLARE @Month INT
11 |
12 | DECLARE BatchingCursor CURSOR FOR
13 | SELECT DISTINCT YEAR([SomeDateField]),MONTH([SomeDateField])
14 | FROM [Sometable];
15 |
16 |
17 | OPEN BatchingCursor;
18 | FETCH NEXT FROM BatchingCursor INTO @Year, @Month;
19 | WHILE @@FETCH_STATUS = 0
20 | BEGIN
21 |
22 | --the transaction is optional
23 | BEGIN TRANSACTION
24 | --All logic goes in here
25 | --Any select statements from [Sometable] need to be suffixed with:
26 | --WHERE Year([SomeDateField])=@Year AND Month([SomeDateField])=@Month
27 | COMMIT TRANSACTION
28 |
29 | FETCH NEXT FROM BatchingCursor INTO @Year, @Month;
30 | END;
31 | CLOSE BatchingCursor;
32 | DEALLOCATE BatchingCursor;
33 | GO
34 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/bulk insert.sql:
--------------------------------------------------------------------------------
1 | --Insert into the view
2 | BULK INSERT YourSchema.YourTable
3 | FROM 'E:\YourFileName.csv'
4 | WITH (
5 | FIELDTERMINATOR = ',',
6 | ROWTERMINATOR = '0x0a',
7 | FIRSTROW = 2
8 | );
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/data cleansing.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Usage:
6 | -- =============================================
7 |
8 | USE [ODS]
9 | GO
10 |
11 |
12 | DROP PROCEDURE [dbo].[usp_CleanYourStageTableNameData]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 |
23 | CREATE PROCEDURE [dbo].[usp_CleanYourStageTableNameData]
24 | AS
25 | BEGIN
26 |
27 | BEGIN TRANSACTION
28 |
29 |
30 | --Convert the rest of the dates into YYYYMMDD format
31 | UPDATE [YourSchemaName].[YourStageTableNameData]
32 | SET [YourDateColumn] = udf_CleanDate([Date])
33 |
34 | --Remove CR from volume data
35 | UPDATE [YourSchemaName].[YourStageTableNameData]
36 | SET YourColumn = REPLACE(YourColumn, CHAR(13) + CHAR(10), '')
37 |
38 | UPDATE [YourSchemaName].[YourStageTableNameData]
39 | SET [YourJoinColumn] = ''
40 | WHERE [YourJoinColumn] IS NULL
41 |
42 |
43 | UPDATE [YourSchemaName].[YourStageTableNameData]
44 | SET Cleansed = 1
45 |
46 |
47 |
48 |
49 |
50 | COMMIT TRANSACTION
51 |
52 | END
53 |
54 |
55 |
56 | GO
57 |
58 |
59 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/fact table load.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- =============================================
6 |
7 | USE ODS
8 |
9 | GO
10 |
11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[sp_ProcessFactYourFactTable]') AND type in (N'P', N'PC'))
12 | DROP PROCEDURE [dbo].[usp_ProcessFactYourFactTable]
13 | GO
14 |
15 |
16 | SET ANSI_NULLS ON
17 | GO
18 |
19 | SET QUOTED_IDENTIFIER ON
20 | GO
21 |
22 | CREATE PROCEDURE usp_ProcessFactYourFactTable
23 | AS
24 | BEGIN
25 |
26 |
27 | IF OBJECT_ID('tempdb..#DimYourDimensionTable') IS NOT NULL DROP TABLE #DimYourDimensionTable
28 | IF OBJECT_ID('tempdb..#DimYourJunkDimension') IS NOT NULL DROP TABLE #DimYourJunkDimension
29 |
30 | CREATE TABLE #DimYourDimensionTable(DimCK BIGINT, SourceSystemKey NVARCHAR(50), ETLKey UNIQUEIDENTIFIER)
31 | CREATE TABLE #DimYourJunkDimension(DimCK BIGINT, RowHash BINARY(16), ETLKey UNIQUEIDENTIFIER)
32 |
33 |
34 | CREATE CLUSTERED INDEX CIDX_1 ON #DimYourDimensionTable(ETLKey)
35 | CREATE CLUSTERED INDEX CIDX_2 ON #DimYourJunkDimension(ETLKey)
36 |
37 |
38 | CREATE NONCLUSTERED INDEX NCIDX_1 ON #DimYourDimensionTable(SourceSystemKey)
39 | CREATE NONCLUSTERED INDEX NCIDX_2 ON #DimYourJunkDimension(RowHash)
40 |
41 |
42 | BEGIN TRANSACTION
43 | ----
44 |
45 | INSERT INTO #DimYourDimensionTable(SourceSystemKey, ETLKey)
46 | SELECT KeyColumn, ETLKey
47 | FROM StageTable
48 | WHERE Processed = 0
49 | AND [ErrorRecord] = 0
50 |
51 |
52 | UPDATE tt
53 | SET tt.DimCK = cmt.YourDimensionTableCK
54 | FROM #DimYourDimensionTable tt
55 | JOIN [cm].DimYourDimensionTable cmt
56 | ON tt.SourceSystemKey = cmt.SourceSystemKey
57 |
58 |
59 | ---
60 | INSERT INTO #DimYourJunkDimension(RowHash, etl_key)
61 | SELECT HASHBYTES('MD5',ColumnName), ETLKey
62 | FROM StageTable
63 | WHERE processed = 0
64 | AND error_record = 0
65 |
66 | UPDATE tt
67 | SET tt.DimCK = cmt.YourJunkDimensionCK
68 | FROM #DimYourJunkDimension tt
69 | JOIN cm.DimYourJunkDimension cmt
70 | ON tt.RowHash = cmt.RowHash
71 |
72 | ----
73 |
74 |
75 | TRUNCATE TABLE [cm].[FactYourFactTable]
76 |
77 |
78 |
79 | INSERT INTO [cm].[FactYourFactTable](
80 | [ETLKey],
81 | YourDimensionTableCK,
82 | YourJunkDimensionCK,
83 | --your columns
84 | [SourceSystem]
85 | )
86 | SELECT
87 | p.[ETLKey],
88 | a.DimCK,
89 | b.DimCK,
90 | --your coulumns
91 | p.SourceSystem
92 | FROM YourStageTable p
93 | JOIN #DimYourDimensionTable a
94 | ON a.ETLKey = p.ETLKey
95 | JOIN #DimYourJunkDimension b
96 | ON b.ETLKey = p.ETLKey
97 | WHERE p.Processed = 0
98 | AND p.[ErrorRecord] = 0
99 |
100 | --Update staging so you can match to production later.
101 | UPDATE st
102 | SET st.[UniqueDims] = cmt.[UniqueDims]
103 | FROM YourStageTable st
104 | JOIN YourFactTable cmt
105 | ON st.ETLKey = cmt.ETLKey
106 |
107 |
108 | MERGE [FactYourFactTable] AS target
109 | USING(
110 | SELECT
111 | YourDimensionTableCK,
112 | YourJunkDimensionCK,
113 | --your columns
114 | [SourceSystem],
115 | [UniqueDims],
116 | FROM [cm].[FactYourFactTable]
117 | ) AS source
118 | ON target.[UniqueDims] = source.[UniqueDims]
119 |
120 | WHEN NOT MATCHED THEN
121 |
122 | INSERT(
123 | YourDimensionTableCK,
124 | YourJunkDimensionCK,
125 | --your columns
126 | [SourceSystem],
127 | [CreatedOn],
128 | [CreatedBy]
129 | )
130 | VALUES(
131 | source.YourDimensionTableCK,
132 | source.YourJunkDimensionCK,
133 | --your columns
134 | source.[Volume],
135 | source.[SourceSystem],
136 | CURRENT_TIMESTAMP,
137 | SYSTEM_USER
138 | )
139 |
140 | WHEN MATCHED
141 | THEN
142 | UPDATE
143 | SET
144 | YourColumns = source.YourColumns,
145 | [SourceSystem] = source.SourceSystem,
146 | [UpdatedBy] = SYSTEM_USER,
147 | [UpdatedOn] = CURRENT_TIMESTAMP
148 | ;
149 |
150 |
151 |
152 |
153 |
154 |
155 | COMMIT TRANSACTION
156 |
157 |
158 | DROP TABLE #DimYourDimensionTable
159 |
160 |
161 | END
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/remove dups.sql:
--------------------------------------------------------------------------------
1 | --Full instructions can be found at: https://tutorials.massstreetuniversity.com/transact-sql/solutions/removing-dups.html
2 |
3 | --First we need keep track of the dups
4 | --If you have a large amount of dups, use a
5 | --temp table instead.
6 | DECLARE @DuplicateIDs TABLE(RowNumber INT, ID BIGINT, TextDescription NVARCHAR(50))
7 |
8 | INSERT INTO @DuplicateIDs(RowNumber, ID, TextDescription)
9 | SELECT ROW_NUMBER() OVER (PARTITION BY TextDescription ORDER BY TextDescription) AS RowNumber, ID, TextDescription
10 | FROM DimYourDimensionTable
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/type I dimension processing.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- =============================================
6 |
7 | USE ODS
8 |
9 | GO
10 |
11 | DROP PROCEDURE IF EXISTS dbo.usp_ProcessDimYourDimensionName
12 | GO
13 |
14 |
15 | SET ANSI_NULLS ON
16 | GO
17 |
18 | SET QUOTED_IDENTIFIER ON
19 | GO
20 |
21 | CREATE PROCEDURE usp_ProcessDimYourDimensionName
22 | AS
23 | BEGIN
24 |
25 | DECLARE @LowDate AS DATETIME
26 | DECLARE @HighDate AS DATETIME
27 |
28 | SELECT @HighDate = CAST(MAX(DateCK) AS NCHAR(8))
29 | FROM DimDate
30 | WHERE DateCK NOT IN (00000000,11111111)
31 |
32 | SELECT @LowDate = CAST(MIN(DateCK) AS NCHAR(8))
33 | FROM DimDate
34 | WHERE DateCK NOT IN (00000000,11111111)
35 |
36 | TRUNCATE TABLE [cm].[DimYourDimensionName]
37 |
38 | BEGIN TRANSACTION
39 |
40 | BEGIN TRANSACTION
41 |
42 | INSERT INTO cm.DimYourDimensionName(
43 | --your columns here
44 | [SourceSystem],
45 | [SourceSystemKey]
46 | )
47 | SELECT DISTINCT
48 | --your columns here
49 | FROM YourStageTable
50 |
51 |
52 |
53 |
54 | MERGE DimYourDimensionName AS target
55 | USING (
56 | SELECT
57 | --your columns here
58 | [SourceSystem],
59 | [SourceSystemKey]
60 | FROM cm.DimYourDimensionName
61 | ) AS source
62 | ON (target.SourceSystemKey COLLATE DATABASE_DEFAULT = source.SourceSystemKey COLLATE DATABASE_DEFAULT)
63 |
64 | WHEN NOT MATCHED THEN
65 | INSERT (
66 | --your columns here
67 | [SourceSystem],
68 | [SourceSystemKey],
69 | EffectiveFrom,
70 | EffectiveTo,
71 | IsMostRecentRecord,
72 | CreatedBy,
73 | CreatedOn
74 | )
75 | VALUES (
76 | --your columns here
77 | [SourceSystem],
78 | [SourceSystemKey],
79 | @LowDate,
80 | @HighDate,
81 | 1,
82 | SYSTEM_USER,
83 | CURRENT_TIMESTAMP
84 | )
85 |
86 | WHEN MATCHED
87 |
88 | THEN
89 | UPDATE
90 | SET
91 | --your columns here
92 | target.[YourColumn] = source.[YourColumn],
93 | CreatedBy = SYSTEM_USER,
94 | CreatedOn = CURRENT_TIMESTAMP
95 | ;
96 |
97 | TRUNCATE TABLE cm.DimYourDimensionName
98 |
99 | INSERT INTO cm.DimYourDimensionName(
100 | YourDimensionNameCK,
101 | SourceSystemKey
102 | )
103 | SELECT
104 | YourDimensionNameCK,
105 | SourceSystemKey
106 | FROM DimYourDimensionName
107 |
108 |
109 |
110 |
111 | COMMIT TRANSACTION
112 |
113 | END
114 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/04 Data Processing Sample Scripts/type II dimension processing.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author:
3 | -- Create date:
4 | -- Description:
5 | -- Usage: https://tutorials.massstreetuniversity.com/transact-sql/solutions/load-typeII-dimension.html
6 | -- =============================================
7 |
8 | USE ODS
9 |
10 | GO
11 |
12 |
13 | DROP PROCEDURE IF EXISTS dbo.usp_ProcessDimYourDimensionName
14 | GO
15 |
16 |
17 | SET ANSI_NULLS ON
18 | GO
19 |
20 | SET QUOTED_IDENTIFIER ON
21 | GO
22 |
23 | CREATE PROCEDURE usp_ProcessDimYourDimensionName
24 | AS
25 | BEGIN
26 |
27 |
28 | DECLARE @LowDate AS DATETIME
29 | DECLARE @HighDate AS DATETIME
30 |
31 | SELECT @HighDate = CAST(MAX(DateCK) AS NCHAR(8))
32 | FROM DimDate
33 | WHERE DateCK NOT IN (00000000,11111111)
34 |
35 | SELECT @LowDate = CAST(MIN(DateCK) AS NCHAR(8))
36 | FROM DimDate
37 | WHERE DateCK NOT IN (00000000,11111111)
38 |
39 |
40 |
41 | IF OBJECT_ID('tempdb..#DimYourDimensionName') IS NOT NULL DROP TABLE #DimYourDimensionName
42 |
43 | CREATE TABLE #DimYourDimensionName(
44 | [YourDimensionNameCK] [bigint] NULL,
45 | --your columns here
46 | [CreatedBy] [nvarchar](50) NULL,
47 | [CreatedOn] [datetime] NULL,
48 | [UpdatedBy] [nvarchar](50) NULL,
49 | [UpdatedOn] [datetime] NULL,
50 | [SourceSystem] [nvarchar](100) NULL,
51 | [SourceSystemKey] [nvarchar](100) NULL,
52 | [EffectiveFrom] [datetime] NULL,
53 | [EffectiveTo] [datetime] NULL,
54 | [IsMostRecentRecord] [bit] NULL
55 | )
56 |
57 |
58 | TRUNCATE TABLE cm.DimYourDimensionName
59 |
60 | BEGIN TRANSACTION
61 |
62 | --Handle new records
63 | INSERT INTO cm.DimYourDimensionName(
64 | --your columns here
65 | [SourceSystem],
66 | [SourceSystemKey]
67 | )
68 | SELECT
69 | DISTINCT
70 | [Name],
71 | [Code],
72 | [IsIntraday],
73 | [TimeZone],
74 | [Suffix],
75 | [Currency],
76 | [Country],
77 | [SourceSystem],
78 | [SourceSystemKey]
79 | FROM [YourSchemaName].[YourStageTable]
80 | WHERE Processed = 0
81 |
82 | MERGE DimYourDimensionName AS target
83 | USING (
84 | SELECT
85 | --your columns here
86 | [SourceSystem],
87 | [SourceSystemKey],
88 | --RowHash
89 | FROM cm.DimYourDimensionName
90 | ) AS source
91 | ON target.[SourceSystemKey] = source.[SourceSystemKey]
92 |
93 | WHEN NOT MATCHED THEN
94 | INSERT (
95 | --your columns here
96 | [SourceSystem],
97 | [SourceSystemKey],
98 | EffectiveFrom,
99 | EffectiveTo,
100 | IsMostRecentRecord,
101 | CreatedBy,
102 | CreatedOn
103 | )
104 | VALUES (
105 | --your columns here
106 | [SourceSystem],
107 | [SourceSystemKey],
108 | @LowDate,
109 | @HighDate,
110 | 1,
111 | SYSTEM_USER,
112 | CURRENT_TIMESTAMP
113 | )
114 | ;
115 |
116 | --Handle changed records
117 | INSERT INTO #DimYourDimensionName(
118 | --your columns here
119 | [SourceSystem],
120 | [SourceSystemKey],
121 | EffectiveFrom,
122 | EffectiveTo,
123 | IsMostRecentRecord,
124 | CreatedBy,
125 | CreatedOn
126 | )
127 | SELECT
128 | --your columns here
129 | [SourceSystem],
130 | [SourceSystemKey],
131 | EffectiveFrom,
132 | EffectiveTo,
133 | IsMostRecentRecord,
134 | CreatedBy,
135 | CreatedOn
136 | FROM(
137 | MERGE DimYourDimensionName AS target
138 | USING (
139 | SELECT
140 | --your columns here
141 | [SourceSystem],
142 | [SourceSystemKey],
143 | --RowHash
144 | FROM cm.DimYourDimensionName
145 | ) AS source
146 | ON target.[SourceSystemKey] = source.[SourceSystemKey]
147 | WHEN MATCHED
148 | --AND source.RowHash <> target.RowHash
149 | AND target.IsMostRecentRecord = 1
150 | THEN
151 | UPDATE
152 | SET
153 | [UpdatedBy] = SYSTEM_USER,
154 | [UpdatedOn] = CURRENT_TIMESTAMP,
155 | EffectiveTo = DATEADD(ss,-1,CURRENT_TIMESTAMP),
156 | IsMostRecentRecord = 0
157 | OUTPUT
158 | $action Action_Out,
159 | --your columns here
160 | source.[SourceSystem],
161 | source.[SourceSystemKey],
162 | CURRENT_TIMESTAMP AS EffectiveFrom,
163 | @HighDate AS EffectiveTo,
164 | 1 AS IsMostRecentRecord,
165 | SYSTEM_USER AS CreatedBy,
166 | CURRENT_TIMESTAMP AS CreatedOn
167 | ) AS MERGE_OUT
168 | WHERE MERGE_OUT.Action_Out = 'UPDATE'
169 | ;
170 |
171 | INSERT INTO DimYourDimensionName(
172 | --your columns here
173 | [SourceSystem],
174 | [SourceSystemKey],
175 | EffectiveFrom,
176 | EffectiveTo,
177 | IsMostRecentRecord,
178 | CreatedBy,
179 | CreatedOn
180 | )
181 | SELECT
182 | --your columns here
183 | [SourceSystem],
184 | [SourceSystemKey],
185 | EffectiveFrom,
186 | EffectiveTo,
187 | IsMostRecentRecord,
188 | CreatedBy,
189 | CreatedOn
190 | FROM #DimYourDimensionName
191 |
192 |
193 | TRUNCATE TABLE cm.DimYourDimensionName
194 |
195 | INSERT INTO cm.DimYourDimensionName(
196 | [YourDimensionNameCK],
197 | [SourceSystemKey]
198 | )
199 | SELECT
200 | [YourDimensionNameCK],
201 | [SourceSystemKey]
202 | FROM DimYourDimensionName
203 | WHERE IsMostRecentRecord = 1
204 |
205 | COMMIT TRANSACTION
206 |
207 | DROP TABLE #DimYourDimensionName
208 |
209 | END
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/add rowhash.sql:
--------------------------------------------------------------------------------
1 | --Adding a RowHash on a TypeII dimension
2 |
3 |
4 | BEGIN TRANSACTION
5 |
6 | DROP INDEX IF EXISTS [Index Name] ON YourSchema.DimYourDimension
7 |
8 | ALTER TABLE YourSchema.DimYourDimension DROP COLUMN IF EXISTS RowHash
9 |
10 | ALTER TABLE YourSchema.DimYourDimension ADD [RowHash] AS (CONVERT([varbinary](16),hashbytes('MD5',concat(
11 | CONVERT([nvarchar](35),Column1,0),
12 | CONVERT([nvarchar](35),Column2,0),
13 | CONVERT([nvarchar](35),Column3,0),
14 | CONVERT([nvarchar](35),Column4,0),
15 | CONVERT([nvarchar](35),Column5,0),
16 | CONVERT([nvarchar](35),Column6,0)
17 | )),0)) PERSISTED
18 |
19 | CREATE NONCLUSTERED INDEX [Index Name] ON YourSchema.DimYourDimension(RowHash)
20 |
21 | COMMIT TRANSACTION
22 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/change collation.sql:
--------------------------------------------------------------------------------
1 | USE ODS
2 |
3 | BEGIN TRANSACTION
4 |
5 | USE EDW
6 |
7 | DROP INDEX IF EXISTS [NCINDEX_YourDimensionName_ROWHASH] ON [dw].[DimYourDimensionName]
8 |
9 | ALTER TABLE [dw].[DimYourDimensionName] DROP COLUMN RowHash
10 |
11 | ALTER TABLE dw.[DimYourDimensionName] ALTER COLUMN [YourDimensionName] [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS
12 |
13 | ALTER TABLE dw.[DimYourDimensionName] ADD RowHash AS (CONVERT([varbinary](16),hashbytes('MD5',concat(
14 | CONVERT([nvarchar](35),Column1,(0)),
15 | CONVERT([nvarchar](35),Column2,(0)),
16 | CONVERT([nvarchar](35),Column3,(0)),
17 | CONVERT([nvarchar](35),Column4,(0))
18 | )))) PERSISTED
19 |
20 | CREATE NONCLUSTERED INDEX [NCINDEX_YourDimensionName_ROWHASH] ON [dw].[DimYourDimensionName](RowHash)
21 |
22 | USE ODS
23 |
24 | DROP INDEX IF EXISTS [NCINDEX_YourDimensionName_ROWHASH] ON cm.[DimYourDimensionName]
25 |
26 | ALTER TABLE cm.[DimYourDimensionName] DROP COLUMN RowHash
27 |
28 | ALTER TABLE cm.[DimYourDimensionName] ALTER COLUMN [YourDimensionName] [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS
29 |
30 | ALTER TABLE cm.[DimYourDimensionName] ADD RowHash AS (CONVERT([varbinary](16),hashbytes('MD5',concat(
31 | CONVERT([nvarchar](35),Column1,(0)),
32 | CONVERT([nvarchar](35),Column2,(0)),
33 | CONVERT([nvarchar](35),Column3,(0)),
34 | CONVERT([nvarchar](35),Column4,(0))
35 | )))) PERSISTED
36 |
37 |
38 | CREATE NONCLUSTERED INDEX [NCINDEX_YourDimensionName_ROWHASH] ON cm.[DimYourDimensionName](RowHash)
39 |
40 | ALTER TABLE cp.OrdersData ALTER COLUMN YourDimensionNames [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS
41 |
42 |
43 | USE EDW
44 |
45 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/configuration insert sample script.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Create an insert statement for every variable that you create as a configuration value.
3 | The PackageName is the name of the package MINUS the package extension.
4 | PackageName functions as a filter that you select your variables on.
5 | */
6 |
7 | USE SSISManagement
8 |
9 | --Substitute the name of your package.
10 |
11 | DELETE FROM SSISConfigurations
12 | WHERE PackageName = 'Package'
13 |
14 | INSERT INTO SSISConfigurations(PackageName, VariableName, VariableValue)
15 | SELECT 'Package', 'strVariable1', 'NewValue'
16 | UNION ALL
17 | SELECT 'Package', 'intVariable2', '1000'
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/documentation block.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7Dec20
4 | -- Description:
5 | -- Change Log:
6 | -- 15Oct20 Created doc bloc sample. - BW
7 | -- =============================================
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/populate fact table with fake data.sql:
--------------------------------------------------------------------------------
1 | -- =============================================
2 | -- Author: Bob Wakefield
3 | -- Create date: 7Dec20
4 | -- Description: This script will populate a fact table with
5 | -- a random amount of records so you can test to see that
6 | -- usp_RecordRowCounts works properly.
7 | -- Change Log:
8 | -- =============================================
9 |
10 | --set the range for where you want the final number of records to fall.
11 | DECLARE @random INT;
12 | DECLARE @upper INT;
13 | DECLARE @lower INT;
14 | SET @lower = 1 ---- The lowest random number
15 | SET @upper = 20 ---- The highest random number
16 | SELECT @random = ROUND(((@upper - @lower -1) * RAND() + @lower), 0)
17 |
18 | --This is an example using adventure works. Create an insert statement for whatever fact table you would like to load.
19 | DECLARE @counter smallint;
20 | SET @counter = 1;
21 | WHILE @counter < @random
22 | BEGIN
23 | INSERT INTO [AdventureWorksDW2012].[dbo].[FactFinance]([DateKey], [OrganizationKey], [DepartmentGroupKey], [ScenarioKey], [AccountKey], [Amount], [Date])
24 | SELECT 20050701, 5, 7, 1, 21, 4358, CURRENT_TIMESTAMP
25 | SET @counter = @counter + 1
26 | END;
27 | GO
28 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/proc execution scripts.sql:
--------------------------------------------------------------------------------
1 | USE ODS
2 |
3 | --Your Model
4 | --Start Over
5 | TRUNCATE TABLE YourFactTable
6 | DELETE FROM DimYourDimTable
7 |
8 | --Pull Data
9 | EXEC usp_YourPullProcess
10 |
11 | --Clean Data
12 | EXEC usp_YourCleanProcess
13 |
14 | --Process Dims
15 | EXEC sp_ProcessDimExchanges
16 |
17 | --Process Facts
18 | EXEC sp_ProcessYourFactTable
19 |
20 | --Finalize and Audit
21 | EXEC sp_RecordRowCounts
22 | EXEC sp_MarkRecordsAsProcessed
23 | EXEC sp_CheckForUnprocessedRecords
24 |
25 | --Monitoring
26 | EXEC sp_TableLoadMonitoring
27 | EXEC sp_LoadTableLoadReportingTable
28 |
29 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/show all columns in database.sql:
--------------------------------------------------------------------------------
1 | USE YourEDW
2 | GO
3 |
4 | SELECT
5 | SCHEMA_NAME(t.schema_id) AS [Schema],
6 | t.name AS [Table Name],
7 | c.name AS [Column Name],
8 | CASE
9 | WHEN ty.name = 'char' OR ty.name = 'nvarchar' THEN CONCAT(ty.name,'(',ty.max_length,')')
10 | WHEN ty.name = 'numeric' THEN CONCAT(ty.name,'(',ty.precision,',',ty.scale,')')
11 | ELSE ty.name
12 | END AS [Data Type]
13 | FROM sys.tables AS t
14 | JOIN sys.columns c
15 | ON t.OBJECT_ID = c.OBJECT_ID
16 | JOIN sys.types ty
17 | ON c.user_type_id = ty.user_type_id
18 | LEFT OUTER JOIN sys.index_columns ic
19 | ON ic.object_id = c.object_id
20 | AND ic.column_id = c.column_id
21 | LEFT OUTER JOIN sys.indexes i
22 | ON ic.object_id = i.object_id
23 | AND ic.index_id = i.index_id
24 | WHERE 1 = 1
25 | --AND c.name LIKE '%YourColumnName%'
26 | ORDER BY [Table Name], [Column Name]
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/troubleshooting.sql:
--------------------------------------------------------------------------------
1 | --View information about current users, sessions, and processes
2 | EXEC sp_who2
3 |
4 | --see how long it will take for
5 | --a rollback to complete
6 | KILL [SPID] WITH STATUSONLY
7 | --Example
8 | KILL 59 WITH STATUSONLY
9 |
10 | --Kill open transactions
11 | DBCC opentran
12 | KILL [SPID]
13 | --Example
14 | KILL 59
15 |
16 |
--------------------------------------------------------------------------------
/Sample Scripts/01 SQL/05 Helper Scripts/utils.sql:
--------------------------------------------------------------------------------
1 | --Create Schema
2 | CREATE SCHEMA SchemaName
3 |
4 | GO
5 |
6 | --Change collation to something case sensative
7 | USE master;
8 |
9 | ALTER DATABASE [your database name] SET SINGLE_USER WITH ROLLBACK IMMEDIATE
10 |
11 | ALTER DATABASE [your database name] COLLATE Latin1_General_CS_AS ;
12 |
13 |
14 | ALTER DATABASE [your database name] SET MULTI_USER WITH ROLLBACK IMMEDIATE
15 |
16 |
17 | --Verify the collation setting.
18 | SELECT name, collation_name
19 | FROM sys.databases
20 | WHERE name = N'[your database name]';
21 | GO
22 |
23 | --Alter collation at the column level
24 | ALTER TABLE [table name] ALTER COLUMN [column name] [your data type] COLLATE Latin1_General_CS_AS
25 |
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/CombineCSVsIntoOneFile.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import glob
3 | import os
4 |
5 | root_directory = #populated from configuration
6 | app_directory = #populated from configuration
7 | source_directory = #as specified
8 | target_directory = #as specified
9 | output_file_name = 'YourFileName.csv'
10 |
11 | source_path = os.path.join(root_directory, app_directory, source_directory)
12 | target_path = os.path.join(root_directory, app_directory, target_directory, output_file_name)
13 |
14 | #change file extention if necessary
15 | source_files = glob.glob(source_path + '*.csv')
16 |
17 | with open(target_path, 'wb') as outfile:
18 | for i, fname in enumerate(source_files):
19 | with open(fname, 'rb') as infile:
20 | if i != 0:
21 | infile.readline() # Throw away header on all but first file
22 |
23 | shutil.copyfileobj(infile, outfile) # Block copy rest of file from input to output without parsing
24 |
25 | # clean input directory
26 | for i, fname in enumerate(source_files):
27 | os.remove(fname)
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/ConvertCommaToPipeDelimitedFile.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 | url = 'https://query.data.world/s/paxejv4t6pn3el4ttskmx7rhxhz5ho'
5 | script_dir = os.getcwd()
6 | data_directory = 'data\\'
7 | example_directory = 'PipeDelimitedExample\\'
8 | file_name = 'flights.csv'
9 | file_path = os.path.join(script_dir,data_directory,example_directory,file_name)
10 |
11 | # Read csv data into pandas and write | delimited txt file
12 | df = pd.read_csv(url,index_col=False, dtype=str)
13 | df.to_csv(file_path, sep="|",index=False)
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/ConvertExcelToCSV.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 |
5 | sourcePath = #as specified
6 | destinationPath = #as specified
7 |
8 | for path, directories, files in os.walk(sourcePath):
9 | for fn in files:
10 | print("Reading Excel file: {}".format(fn))
11 |
12 | df = pd.read_excel(os.path.join(sourcePath, fn), sheet_name="DATASET")
13 |
14 | FullPath = destinationPath + "/" + os.path.splitext(fn)[0] + ".csv"
15 |
16 | print("Writing txt file to: {}".format(FullPath))
17 |
18 | df.to_csv(FullPath, sep="|", index=False)
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/LoopingOverFilesInADirectory.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | #Loop Over Everything In Folder
4 | script_dir = os.getcwd()
5 | data_directory = 'data\\'
6 | example_directory = 'FileLoopExample\\'
7 | path = os.path.join(script_dir,data_directory,example_directory)
8 |
9 | for filename in os.listdir(path):
10 | print(filename)
11 |
12 |
13 | #Loop Over Files With A Specific File Extention
14 | script_dir = os.getcwd()
15 | data_directory = 'data\\'
16 | example_directory = 'FileLoopExample\\'
17 | path = os.path.join(script_dir,data_directory,example_directory)
18 |
19 | for filename in os.listdir(path):
20 | if filename.endswith('.csv'):
21 | print(filename)
22 |
23 | #Loop Over Files In Subdirectories Recursively
24 | import os
25 |
26 | script_dir = os.getcwd()
27 | data_directory = 'data\\'
28 | example_directory = 'FileLoopExample\\'
29 | path = os.path.join(script_dir,data_directory,example_directory)
30 |
31 | for subdir, dirs, files in os.walk(path):
32 | for filename in files:
33 | print(filename)
34 |
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/ProcessZipFile.py:
--------------------------------------------------------------------------------
1 | import urllib.request
2 | import os
3 | from pyunpack import Archive
4 |
5 | url = 'https://query.data.world/s/vb53nuuux6umwmccbwlajvlzttmz3q'
6 | script_dir = os.getcwd()
7 | file_name = 'Eurostat.zip'
8 | data_directory = 'data\\'
9 | example_directory = 'ZipFileExample\\'
10 | abs_file_path = os.path.join(script_dir, data_directory, example_directory, file_name)
11 | abs_directory_path = os.path.join(script_dir, data_directory, example_directory)
12 |
13 | with urllib.request.urlopen(url) as dl_file:
14 | with open(abs_file_path, 'wb') as out_file:
15 | out_file.write(dl_file.read())
16 |
17 | Archive(abs_file_path).extractall(abs_directory_path)
18 | os.remove(abs_file_path)
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/QueryDatabaseAndWriteLargeFile.py:
--------------------------------------------------------------------------------
1 | import pyodbc as db
2 | import os
3 | import time
4 | import csv
5 |
6 | script_dir = os.getcwd()
7 | data_directory = 'data\\'
8 | example_directory = 'WriteLargeFilesExample\\'
9 | file_name = 'EurostatDataOut.csv'
10 | target_path = os.path.join(script_dir, data_directory, example_directory, file_name)
11 |
12 | sql = 'SELECT * FROM EurostatData'
13 |
14 | # Set up the connection.
15 | print('Connecting to SQL Server database' + time.strftime(' %H:%M:%S'))
16 | connection_string = 'DSN=ETL;'
17 | conn = db.connect(connection_string)
18 | print('Preparing database for update' + time.strftime(' %H:%M:%S'))
19 | csr = conn.cursor()
20 | csr.execute(sql)
21 |
22 | with open(target_path, 'w', newline='') as f:
23 | writer = csv.writer(f, delimiter='|', quoting=csv.QUOTE_NONE)
24 | writer.writerow([x[0] for x in csr.description])
25 | for row in csr:
26 | writer.writerow(row)
27 |
28 | print('Processing file {} complete.'.format(file_name) + time.strftime(' %H:%M:%S'))
29 | conn.commit()
30 | csr.close()
31 | conn.close()
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/QueryDatabaseAndWriteSmallFile.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pandas as pd
3 | import pyodbc as db
4 |
5 | #Constants used to process data files
6 | #Exsiting paths are examples only!
7 |
8 |
9 | kFileDestination = 'C:/InterfaceAndExtractFiles/../Out/data.csv' #full path to processing Out folder
10 |
11 | #small amounts of data
12 | print("Starting: Processing Data")
13 |
14 | #alter the below for your file.
15 | try:
16 | print("Connecting to SQL Server database")
17 | connection_string = 'DSN=ETL;'
18 | conn = db.connect(connection_string)
19 | csr = conn.cursor()
20 | sql = "SELECT * FROM [table or view]"
21 | df = pd.read_sql(sql,conn)
22 | conn.commit()
23 | csr.close()
24 | conn.close()
25 | except Exception as e:
26 | print(e)
27 |
28 | #Write File
29 | df.to_csv(kFileDestination, sep="|",index=False)
30 |
31 |
32 | # Script Complete
33 | print("Complete: Processing Data")
34 |
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/SendEmail.py:
--------------------------------------------------------------------------------
1 | import smtplib
2 | from email.message import EmailMessage
3 |
4 |
5 | with open('global_config/config.yaml') as settings:
6 | cfg = yaml.load(settings)
7 |
8 |
9 | from_address = (cfg['from_address'])
10 | to_address = (cfg['to_address'])
11 | password = (cfg['password'])
12 | smtp_server = (cfg['smtp_server'])
13 | smtp_port = (cfg['smtp_port'])
14 |
15 |
16 | def send_exception_email(exchange_directory):
17 | msg = EmailMessage()
18 | msg['From'] = from_address
19 | msg['To'] = to_address
20 | msg['Subject'] = 'Empty Directory In EOD Data'
21 | msg.set_content('There are no files in ' + exchange_directory)
22 |
23 | try:
24 | server = smtplib.SMTP_SSL(smtp_server, smtp_port)
25 | server.login(from_address, password)
26 | server.send_message(msg)
27 | server.quit()
28 | except TimeoutError as e:
29 | print(str(e))
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/StringMatching.py:
--------------------------------------------------------------------------------
1 | from fuzzywuzzy import fuzz
2 | from fuzzywuzzy import process
3 | import sys
4 | import pandas as pd
5 | import pyodbc as db
6 |
7 | try:
8 | print("Connecting to SQL Server database")
9 | connection_string = 'DSN=ETL;'
10 | conn = db.connect(connection_string)
11 | csr = conn.cursor()
12 | sql = "SELECT TOP 100 CONCAT([FirstName], '|', [MiddleName], '|', [LastName]) AS String FROM DimCustomer"
13 | sql2 = "SELECT TOP 100 CONCAT([FirstName], '|', [MiddleName], '|', [LastName]) AS String FROM DimCustomer ORDER BY NEWID()"
14 | df1 = pd.read_sql(sql,conn)
15 | df2 = pd.read_sql(sql2,conn)
16 | conn.commit()
17 | csr.close()
18 | conn.close()
19 | except:
20 | sys.exit("ERROR: Unable to query table or write file")
21 |
22 | #print(df1['String'])
23 |
24 | def fuzzywuzzy_match(x, choices, scorer, cutoff):
25 | return process.extractOne(x, choices=choices, scorer=scorer, score_cutoff=cutoff)
26 |
27 | FuzzyWuzzyResults = df1['String'].apply(fuzzywuzzy_match, args=(df2['String'], fuzz.token_set_ratio, 90))
28 |
29 | print(FuzzyWuzzyResults)
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Building Blocks/YAMLConfigImport.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 | with open('config.yaml') as settings:
4 | cfg = yaml.load(settings)
5 |
6 | variable_name = cfg['config_name']
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Full Solutions/DownloadMoveAndStoreDataCSV.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import urllib
4 | import datetime
5 | import pandas as pd
6 | import pyodbc as db
7 | import zipfile as zf
8 |
9 | #The following four varaibles should be
10 | #populated by configuration
11 | FileExchangeRootDirectory = ''
12 | WarehouseProcessingRootDirectory = ''
13 | BusinessDirectory = ''
14 | ProcessDirectory = ''
15 |
16 | DataDirectory = 'In\\'
17 | ArchiveDirectory = 'Archive\\'
18 | FileName = ''
19 | fileTimeStampedTXT = ''
20 |
21 | FileURL = 'https://url.com/data.csv' #URL of File
22 | SiteURL = 'https://url.com' #URL of Site
23 |
24 |
25 | FullSourcePath = os.path.join(FileExchangeRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName)
26 | DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory)
27 | ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory)
28 |
29 |
30 | print("Starting: Processing Data")
31 |
32 | # verify that the site is available and the internet connection is working
33 | try:
34 | print("Validating status of site and internet connection")
35 | urllib.request.urlopen(SiteURL)
36 | except Exception as e:
37 | print(e)
38 |
39 | # download the file
40 | try:
41 | print("Downloading file to:", FullSourcePath)
42 | urllib.request.urlretrieve(FileURL, FullSourcePath)
43 | except Exception as e:
44 | print(e)
45 |
46 |
47 | # Read csv data into pandas and write | delimited txt file
48 | try:
49 | print("Reading csv file: {}".format(FullSourcePath))
50 | df = pd.read_csv(FullSourcePath,index_col=False,type=str)
51 | downloadTimeStamp = datetime.datetime.today().strftime('%Y%m')
52 | print("Setting download timestamp")
53 | df['AsOfMonth'] = downloadTimeStamp
54 | fileTimeStampedTXT = DestinationDirectory + FileName + downloadTimeStamp + "_data.txt"
55 | print("Writing txt file to: {}".format(fileTimeStampedTXT))
56 | df.to_csv(fileTimeStampedTXT, sep="|",index=False)
57 | except Exception as e:
58 | print(e)
59 |
60 | # delete csv file
61 | try:
62 | print("Deleting csv file: {}".format(FullSourcePath))
63 | if os.path.isfile(FullSourcePath):
64 | os.remove(FullSourcePath)
65 | except Exception as e:
66 | print(e)
67 |
68 | #alter the below for your file.
69 | #index table only if necessary
70 | #bulk load txt file to SQL Server
71 | try:
72 | print("Connecting to SQL Server database")
73 | connection_string = 'DSN=ETL;'
74 | conn = db.connect(connection_string)
75 | print("Preparing database for update")
76 | csr = conn.cursor()
77 | csr.execute("DELETE FROM [stage table] WHERE Processed = 1")
78 | csr.execute("DROP INDEX IF EXISTS [index name] ON [stage table]")
79 | sql = "BULK INSERT [stage table view] FROM '" + fileTimeStampedTXT + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)"
80 | print("Update database with {} file data.".format(fileTimeStampedTXT))
81 | csr.execute(sql)
82 | print("Creating index on SQL table")
83 | csr.execute("CREATE NONCLUSTERED INDEX [index name] ON [stage table]([column name])")
84 | print("Completing SQL Server update")
85 | conn.commit()
86 | csr.close()
87 | conn.close()
88 | except Exception as e:
89 | print(e)
90 |
91 | # zip txt file to archive
92 | try:
93 | zipFile = ArchiveDestinationDirectory + FileName + downloadTimeStamp + "data.zip"
94 | print("Creating zip file for txt file archive")
95 | archive = zf.ZipFile(zipFile, "w")
96 | archive.write(fileTimeStampedTXT, os.path.basename(fileTimeStampedTXT))
97 | archive.close
98 | except Exception as e:
99 | print(e)
100 |
101 | # delete txt file
102 | try:
103 | print("Deleting txt file: {}".format(fileTimeStampedTXT))
104 | if os.path.isfile(fileTimeStampedTXT):
105 | os.remove(fileTimeStampedTXT)
106 | except Exception as e:
107 | print(e)
108 |
109 | # Script Complete
110 | print("Complete: Processing Data")
111 |
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Full Solutions/LoadLargeCSVsIntoDataWarehouseStagingTables.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pyodbc as db
3 | import time
4 |
5 | #The following four varaibles should be
6 | #populated by configuration
7 | WarehouseProcessingRootDirectory = ''
8 | BusinessDirectory = ''
9 | ProcessDirectory = ''
10 |
11 | DataDirectory = 'In\\'
12 | FileName = ''
13 |
14 | FullSourcePath = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName)
15 |
16 | #Build SQL Statements
17 | drop_index_sql = 'ALTER TABLE [YourSchema].[YourStageTable] DROP CONSTRAINT [PK_YourStageTable] WITH ( ONLINE = OFF )'
18 |
19 | add_index_sql = 'ALTER TABLE [YourSchema].[YourStageTable] ADD CONSTRAINT [PK_YourStageTable] PRIMARY KEY CLUSTERED'
20 | add_index_sql = add_index_sql + '([ETLKey] ASC) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,'
21 | add_index_sql = add_index_sql + 'SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON,'
22 | add_index_sql = add_index_sql + 'ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]'
23 |
24 | sql = "BULK INSERT YourStageTableView FROM '" + FullSourcePath + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2, TABLOCK, BATCHSIZE = 100000)"
25 |
26 | #Set up the connection.
27 | print('Connecting to SQL Server database' + time.strftime(' %H:%M:%S'))
28 | connection_string = 'DSN=ETL;'
29 | conn = db.connect(connection_string)
30 | print('Preparing database for update' + time.strftime(' %H:%M:%S'))
31 | csr = conn.cursor()
32 |
33 | #now let's load the file
34 | print('Begin processing {}.'.format(FileName) + time.strftime(' %H:%M:%S'))
35 | csr.execute('TRUNCATE TABLE [YourSchema].[YourStageTable]')
36 | csr.execute(drop_index_sql)
37 | print('Updating staging')
38 | csr.execute(sql)
39 | csr.execute(add_index_sql)
40 | print('Processing file {} complete.'.format(FileName) + time.strftime(' %H:%M:%S'))
41 | conn.commit()
42 | csr.close()
43 | conn.close()
44 |
45 | print('Complete: Processing Data' + time.strftime(' %H:%M:%S'))
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Full Solutions/MoveAndStoreDataExcel.py:
--------------------------------------------------------------------------------
1 | #See documenatation for use of this script.
import os
import sys
import datetime
import pandas as pd
import pyodbc as db
import zipfile as zf
#The following four varaibles should be
#populated by configuration
FileExchangeRootDirectory = ''
WarehouseProcessingRootDirectory = ''
BusinessDirectory = ''
ProcessDirectory = ''
DataDirectory = 'In\\'
ArchiveDirectory = 'Archive\\'
FileName = ''
fileTimeStampedTXT = ''
FullSourcePath = os.path.join(FileExchangeRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName)
DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory)
ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory)
print("Starting: Processing data")
# Check if required file exists
if not os.path.isfile(FullSourcePath):
sys.exit("ERROR: Unable to find file {}".format(FullSourcePath))
# Read excel data into pandas and write | delimited txt file
try:
print("Reading Excel file: {}".format(FullSourcePath))
df = pd.read_excel(FullSourcePath, sheet_name="DATASET")
# set timestamp for file processing
print("Setting process timestamp")
processTimeStamp = datetime.datetime.today().strftime('%Y%m%d')
# Create txt filename wih timestamp
fileTimeStampedTXT = DestinationDirectory + processTimeStamp + "_data.txt"
print("Writing txt file to: {}".format(fileTimeStampedTXT))
df.to_csv(fileTimeStampedTXT, sep="|", index=False)
except Exception as e:
print(e)
# delete xlsx file
try:
print("Deleting xlsx file: {}".format(FullSourcePath))
if os.path.isfile(FullSourcePath):
os.remove(FullSourcePath)
except Exception as e:
print(e)
# bulk load txt file to SQL Server
try:
print("Connecting to SQL Server database")
connection_string = 'DSN=ETL;'
conn = db.connect(connection_string)
print("Preparing database for update")
csr = conn.cursor()
csr.execute("DELETE FROM [stage table] WHERE Processed = 1")
print("Preparing bulk insert update")
sql = "BULK INSERT [stage table view] FROM '" + fileTimeStampedTXT + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)"
print("Update database with {} file data.".format(fileTimeStampedTXT))
csr.execute(sql)
conn.commit()
csr.close()
conn.close()
except Exception as e:
print(e)
# zip txt file to archive
try:
zipFile = ArchiveDestinationDirectory + processTimeStamp + "__data.zip"
print("Creating zip file for txt file archive")
archive = zf.ZipFile(zipFile, "w")
archive.write(fileTimeStampedTXT, os.path.basename(fileTimeStampedTXT))
archive.close
except Exception as e:
print(e)
# delete txt file
try:
print("Deleting txt file: {}".format(fileTimeStampedTXT))
if os.path.isfile(fileTimeStampedTXT):
os.remove(fileTimeStampedTXT)
except Exception as e:
print(e)
# Script Complete
print("Complete: Processing Data")
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Full Solutions/ReloadFromArchive.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import glob
4 | import pyodbc as db
5 | import zipfile as zf
6 |
7 | #The following three varaibles should be
8 | #populated by configuration
9 | WarehouseProcessingRootDirectory = ''
10 | BusinessDirectory = ''
11 | ProcessDirectory = ''
12 |
13 | DataDirectory = 'In\\'
14 | ArchiveDirectory = 'Archive\\'
15 | FileName = ''
16 | fileTimeStampedTXT = ''
17 |
18 | DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory)
19 | ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory)
20 |
21 | print("Starting: Processing ReloadAchive")
22 |
23 | # get list of zip file
24 | try:
25 | print("Getting zip files from archive directory {}".format(ArchiveDestinationDirectory))
26 | zipFiles = glob.glob(ArchiveDestinationDirectory + "*.zip", recursive=False)
27 | except Exception as e:
28 | print(e)
29 |
30 |
31 | # Report number of zip files found
32 | print("{} zip files found".format(len(zipFiles)))
33 |
34 | # unzip each file in zipFiles
35 | for zFile in zipFiles:
36 | try:
37 | print("Unzipping file {}".format(zFile))
38 | zip_ref = zf.ZipFile(zFile, 'r')
39 | zip_ref.extractall(DestinationDirectory)
40 | zip_ref.close()
41 | except Exception as e:
42 | print(e)
43 |
44 | # get list of txt files in C:/InterfaceAndExtractFiles/../In/
45 | try:
46 | print("Getting txt files from directory {}".format(DestinationDirectory))
47 | txtFiles = glob.glob(DestinationDirectory + "*.txt", recursive=False)
48 | except Exception as e:
49 | print(e)
50 |
51 | # Report number of zip files found
52 | print("{} txt files found".format(len(txtFiles)))
53 |
54 | # Create database connection
55 | try:
56 | print("Connecting to SQL Server database")
57 | connection_string = 'DSN=ETL;'
58 | conn = db.connect(connection_string)
59 | except Exception as e:
60 | print(e)
61 |
62 | # preparing SQL Server
63 | try:
64 | print("Preparing database for update")
65 | csr = conn.cursor()
66 | csr.execute("DELETE FROM [stage table] WHERE Processed = 1")
67 | csr.execute("DROP INDEX IF EXISTS [index name] ON [stage table]")
68 | conn.commit()
69 | csr.close()
70 | except Exception as e:
71 | print(e)
72 |
73 | # creating counters
74 | txtFileCount = len(txtFiles)
75 | n = 1
76 |
77 | # process each txt file
78 | for tFile in txtFiles:
79 | try:
80 | print("Processng file {} of {}: Update database with {} file data.".format(n, txtFileCount, tFile))
81 | n += 1
82 | csr = conn.cursor()
83 | sql = "BULK INSERT [stage table view] FROM '" + tFile + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)"
84 | csr.execute(sql)
85 | conn.commit()
86 | csr.close()
87 | except Exception as e:
88 | print(e)
89 |
90 | try:
91 | print("Deleting {} file".format(tFile))
92 | if os.path.isfile(tFile):
93 | os.remove(tFile)
94 | except Exception as e:
95 | print(e)
96 |
97 | # Complete SQL Server processing
98 | try:
99 | print("Completing SQL Server update")
100 | print("Creating index on SQL table")
101 | csr = conn.cursor()
102 | csr.execute("CREATE NONCLUSTERED INDEX [index name] ON [stage table]([column name])")
103 | print("Completing SQL Server update")
104 | conn.commit()
105 | csr.close()
106 | conn.close()
107 | except Exception as e:
108 | print(e)
109 |
110 | # Complete
111 | print("COMPLETE: Process Reload from Archive")
--------------------------------------------------------------------------------
/Sample Scripts/02 Python/Jupyter Notebooks/PotentialMDMProcess.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 18,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Connecting to SQL Server database\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "import sys\n",
18 | "import pandas as pd\n",
19 | "import pyodbc as db\n",
20 | "import hdbscan\n",
21 | "from sklearn.datasets import make_blobs\n",
22 | "\n",
23 | "\n",
24 | "try:\n",
25 | " print(\"Connecting to SQL Server database\")\n",
26 | " connection_string = 'DSN=ETL;'\n",
27 | " conn = db.connect(connection_string)\n",
28 | " csr = conn.cursor()\n",
29 | " sql = \"SELECT FirstName FROM DimCustomer\"\n",
30 | " df1 = pd.read_sql(sql,conn)\n",
31 | " conn.commit()\n",
32 | " csr.close()\n",
33 | " conn.close()\n",
34 | "except:\n",
35 | " sys.exit(\"ERROR: Unable to query table or write file\")"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 19,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "from sklearn.feature_extraction.text import CountVectorizer\n",
45 | "from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm\n",
46 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
47 | "from sklearn import decomposition, ensemble\n",
48 | "\n",
49 | "vectorizer = CountVectorizer()\n",
50 | "X = vectorizer.fit_transform(df1['FirstName'])\n",
51 | "clusterer = hdbscan.HDBSCAN(min_cluster_size=10)\n",
52 | "cluster_labels = clusterer.fit_predict(X)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 16,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/plain": [
63 | ""
64 | ]
65 | },
66 | "execution_count": 16,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | },
70 | {
71 | "data": {
72 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAADxCAYAAAAjibd7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAbKklEQVR4nO3df5xldX3f8debZSnKD+lDVh9mYQQNiJhKQwfQxkTAoAvVUvwRUKsJilsasI2xBB4xyqz2h0rSqEFcJ4iIJtIqBBZd2fqzYHFlFxOXH4pdMYUFG3ZBheIP2Jl3/zhn8O4wc8+Zu/fcOXfu+/l4nMfMOef7Ped7lwef+53vT9kmIiJGyx6LXYCIiBi8BP+IiBGU4B8RMYIS/CMiRlCCf0TECErwj4gYQQn+EREDIukySfdLum2e+5L0IUlbJW2RdHRTZUnwj4gYnMuBVV3unwwcVh6rgY80VZAE/4iIAbF9A/BglySnAle4sBE4QNIzmijLnk08tEmrVq3y9ddfv9jFiIjhoN19wMtO2McPPDhVK+0tW35xO/DzjkuTticX8LqVwD0d59vKaz9cwDNqGbrgv2PHjsUuQkSMkAcenOLmDWO10i57xv/+ue3x3XjdXF9WjazBM3TBPyJikAxMMz2o120DDu44Pwi4r4kXpc0/IqILYx7zVK2jD9YBbyxH/bwA+Intvjf5QGr+ERGV+lXzl/Rp4HjgQEnbgAuB5QC21wLrgVOArcBPgTP78uI5JPhHRHRhzFSflr63/dqK+wbO6cvLKiT4R0RUmG6mz3VRJfhHRHRhYCrBPyJi9CzFmn9jo32q1rDoSHeMpClJr26qLBERvTLwmF3rGCZN1vwvBy4GrpgvgaRlwPuADU0V4rTjX8M1//OzTT0+IgboXecdwZr3f2eg7zReks0+jdX8a6xhAfBW4Crg/qbK8f9uANtMbT+Nqe2nYZvf1quxvcvv/bjW1HNnX+v8LDO/d16bK92wftaqzz+I97fls47Cf+uZn3N91qntp3HhHz23qVAxP8NUzWOYLNokL0krgdOAtTXSrpa0WdLm7du3N1+4iIhSMcO33jFMFnOG7weA8+3qaXG2J22P2x5fsWLFAIoWETFDTNU8hslijvYZB66UBHAgcIqknbavWcQyRUTsoujwHa7AXseiBX/bh878Luly4HMJ/BHRNsU4/wT/2mqsYRERMRSmU/OvzxVrWMxK+3tNlSMiYnek5h8RMYKMmFqCq98n+EdEVEizT0TEiDHiUS9b7GL0XYJ/REQXxSSvNPtERIycdPhGRIwYW0w5Nf+IiJEznZp/RMRoKTp8l16oXHqfKCKij9LhGxExoqYyzj8iYrRkhm9ExIiazmifiIjRUizsluAfETFSjHgsyztERIwWm0zyiogYPcokr4iIUWNS84+IGEnp8I2IGDFGS3Izl8a+ziRdJul+SbfNc//1kraUx02SjmqqLBERvTLwmPesdQyTJv+WuRxY1eX+D4AX234+8B5gssGyRET0SEzVPIZJY19Vtm+QdEiX+zd1nG4EDmqqLBERvTKZ4dukNwNfmO+mpNXAaoCxsbFBlSkiAliaO3kt+teZpBMogv/586WxPWl73Pb4ihUrBle4iBh5tpj2HrWOKpJWSbpT0lZJF8xx/ymSrpP0bUm3SzqzkQ/FItf8JT0fuBQ42fYDi1mWiIi5FB2+u7+8g6RlwIeBk4BtwCZJ62zf0ZHsHOAO26+QtAK4U9Jf2X50twswy6IFf0ljwNXAG2x/b7HKERHRXd/28D0W2Gr7LgBJVwKnAp3B38B+kgTsCzwI7OzHy2drLPhL+jRwPHCgpG3AhcByANtrgXcBTwUuKT4nO22PN1WeiIheFB2+tdv8D5S0ueN80vbMSMaVwD0d97YBx83KfzGwDrgP2A843fb0ggtdQ5OjfV5bcf8s4Kym3h8R0S8LmOG7o0sldq5vEM86fxnwd8CJwLOBL0q60fZDdQtQ16J3+EZEtNnMDN86R4VtwMEd5wdR1PA7nQlc7cJWivlQR/Ttw3RI8I+IqDDNHrWOCpuAwyQdKmkv4AyKJp5OdwMvAZD0dOA5wF19/jhAe8b5R0S0kg2PTe9+Pdn2TknnAhuAZcBltm+XdHZ5fy3FageXS7qVopnofNs7dvvlc0jwj4joomj26U8jie31wPpZ19Z2/H4f8NK+vKxCgn9ERIWlOMM3wT8ioosFDvUcGgn+ERFd9a/Zp00S/CMiKmQP34iIEVOM9tn9tX3aJsE/IqKLpbqNY4J/RESFNPtERIyYto72kbQ38HLgN4FfAX4G3AZ83vbtVfkT/CMiKrRttI+kCeAVwNeAbwL3A3sDhwPvLb8Y3m57y3zPSPCPiOjCFjtbFvyBTbYn5rn3XyU9Dei6522Cf0REhbY1+9j+/OxrkvYA9rX9kO37Kf4amFfrvs4iItpkps2/D0s6952kv5a0v6R9KHYEu1PSeXXyJvhHRFRoa/AHjiw3evlXFAvGjQFvqJMxzT4REV20fJz/cknLKYL/xbYfK7fFrZSaf0REhWlU61gEHwX+HtgHuEHSM4Gf1MnYWPCXdJmk+yXdNs99SfqQpK2Stkg6uqmyRET0yoad03vUOhbBdbZX2j7Ftil2AntTnYxNlvZyYFWX+ycDh5XHauAjDZYlIqJnLW7zv6rzpPwCuLJOxsba/G3fIOmQLklOBa4oC7tR0gGSnmH7h02VKSJiodrY5i/pCOB5wFMkvbLj1v4Uk70qLWaH70rgno7zbeW1JwR/Sasp/jpgbKzrvIWIiL5zy4I/xcbuLwcOoJjpO+Nh4C11HrCYwX+uf03PldD2JDAJMD4+PmeaiIimtG1hN9vXAtdKeqHtb/TyjMUM/tuAgzvODwLuW6SyRETMyW7fDN8OWyX9MXAIHfHcdmWn72IG/3XAuZKuBI4DfpL2/ohoHzG1OCN56rgWuBH4EjC1kIyNBX9JnwaOBw6UtA24EFgOYHstxWy0U4CtwE+BM5sqS0TE7mhhm/+MJ9s+v5eMTY72eW3FfQPnNPX+iIh+aOt6/qXPSTrF9vqFZmzt3zIREa3got2/zrEI/j3FF8DPJD0k6WFJD9XJmLV9IiIqtG20zwzb+/WaN8E/IqILt7DDV9IRtr8737I4tr9V9YwE/4iICovUpNPNH1JMfP2zOe4ZOLHqAQn+EREV2jbax/bq8ucJvT4jwT8ioouiM7ddwX9GuZb/vwV+q7z0NeCjth+rypvgHxFRocVDPT9CMX/qkvL8DeW1s6oyJvhHRFRoYZv/jGNsH9Vx/hVJ366TMcE/IqILI6ZbNtqnw5SkZ9v+PoCkZ1FzmYcE/4iICu2t+HMe8FVJd1GslPxMai6Vk+AfEdFNHzt8Ja0CPggsAy61/d450hwPfICiLX+H7RfPWzT7y5IOo1jfX8B3bf+iTlkS/CMiqvSh6i9pGfBh4CSKJe03SVpn+46ONAdQdN6usn23pKdVPHNv4PeBF5WlvFHSWts/rypPaxuyIiLawlato8KxwFbbd9l+lGKv3VNnpXkdcLXtu4v3+v6KZ15BsZ3jXwAXA0cCn6zzmVLzj4jowsD0dO1mnwMlbe44nyx3IoS5t649blb+w4Hlkr4G7Ad80PYVXd73nFmjfb6a0T4REf1goH6b/w7b4/Pcq7N17Z7APwNeAjwJ+Iakjba/N88z/1bSC2xvBJB0HPC/6hQ0wT8iokKfxvnX2bp2G8UXyCPAI5JuAI4C5gv+xwFvlHR3eT4GfEfSrRTbpjx/vsIk+EdEVOlP8N8EHCbpUOBe4AyKNv5O1wIXS9oT2IsiuP95l2eu6rUwtYK/pMMppgw/3favSXo+8C9t/8deXxwRMRxqdeZWsr1T0rnABoqhnpfZvl3S2eX9tba/I+l6YAswTTEc9LYuz/w/vZanbs3/LykmE3y0fOEWSX8NJPhHxNLXp1le5XaL62ddWzvr/CLgov68cX51h3o+2fbNs67trMokaZWkOyVtlXTBHPefIuk6Sd+WdLukbOIeEe1i8LRqHYMi6R/t7jPqBv8dkp5N+f0n6dXAD7tl6JjQcDLF2NPXSjpyVrJzgDvKoUrHA38maa/6xY+IGATVPAbmGwCSao3pn0vdZp9zgEngCEn3Aj8A/nVFnscnNJSFnJnQcEdHGgP7SRKwL/AgNf6iiIgYqPYt7rOXpN8F/rmkV86+afvqqgfUCv5lAP9tSfsAe9h+uEa2OhMaLgbWUQx32g843fb07AdJWk2xZRljY2N1ihwR0T/tC/5nA68HDgBeMeuegf4Ef0n/GXi/7R+X5/8YeLvtP+mWbY5rs/8JXwb8HcV+k88GvijpRtsP7ZKpmCE3CTA+Pt6+/wwRsXQtbJLXQNj+OvB1SZttf6yXZ9Rt8z95JvCXL/4RcEpFnjoTGs6kWMfCtrdSNCcdUbNMEREDUWzlWH0sgk9K+neSPlseby23dqxUN/gv6+xdlvQkoKq3+fEJDWUn7hkUTTyd7qaYxoykp1MsS3pXzTJFRAzGtOodg3cJxXIQl5TH0RRzsirV7fD9FPBlSR+n+CPoTcAnumWoM6EBeA9weTkVWcD5tnfULFNExECovY3NzW7jaPv9ZYB+CUWQfo/tDTXydZ3QYPs+4KV1yhARsShMGzt8ZzS/jaPtLwBf6K18ERHDSq3r8O3Q7DaO5TjS9wFP45ezGWx7/56KGxExTFpa8x/ENo7vB15h+zs9ljEiYng9YfZRe5TBfstC89UN/v+QwB8RI6mF4/z7oW7w3yzpvwHXAI//SVFnCnFExLBr42ifclmcg2zfU5l4DnWD//7AT9l1ZE6tKcQREUOvhcHftiVdQzHOf8HqDvXMUssREe2zUdIxtjctNGPd0T57A28GngfsPXPd9psW+sKIiGHTxmaf0gnA2ZL+HniEX47EnHfv3hl1m30+CXyXYiG2d1OsJpcO4IhY+sxiLd1Qx8m9Zqy7ts+v2n4n8IjtTwD/Avgnvb40ImKouOYx6GIVe/geDJxY/v5Tasb1ujX/x8qfP5b0a8D/BQ5ZYDkjIoZSW5t9JF0IjFNM8vo4sJxiLbbfqMpbN/hPlmv4/wnFypz7Au/sqbQREcOmpcEfOA34deBbUKyXJmm/OhnrBv8vl2v43wA8C0DSoT0UNCJi+LQ3+D9aDvmc2V99n7oZ67b5XzXHtc/WfUlExLCS6x+L4L9L+ihwgKS3AF8C/rJOxq41f0lHUAzvfMqsTYL3p2PIZ0TEktbS0T62/1TSScBDwOHAu2x/sU7eqmaf5wAv54mbBD8MvKWHskZEDJ22dviWbgWeRNE4dWvdTF2Dv+1rgWslvdD2N3avfBERQ6qlwV/SWcC7gK9QTPD6C0nvtn1ZVd66Hb6nSbod+BlwPXAU8Ae2P9VjmSMihsPitefXcR7w67YfAJD0VOAmoDL41+3wfanthyiagLZRtC2d11tZIyKGTEsneVHE44c7zh8Gaq3yWbfmv7z8eQrwadsPFquJdidpFfBBig3cL7X93jnSHA98oHzHDtsvrlmmiIiBUMs2c5H0h+Wv9wLflHQtxdfPqcDNdZ5RN/hfJ+m7FM0+vy9pBfDzisItAz4MnETx7bRJ0jrbd3SkOQC4BFhl+25JT6tZnoiIUTYzkev75THj2roPqLuk8wWS3gc8ZHtK0iMU3zDdHAtstX0XgKQryzx3dKR5HXC17bvL99xft+AREQPTsjZ/22t29xlV4/xPtP2VzjH+s5p7um3mspJd2562AcfNSnM4sFzS1yi+yT5o+4o5yrEaWA0wNjbWrcgREf3V4g5fSePAO4Bn0hHP+7Gk829RDCF6BcV3n2b97Bb85+oUmP1PuCfFLjQvoRin+g1JG21/b5dM9iQwCTA+Pt7S/wwRsWS1N+r8FcXgm1tZ4DbzVcH/4bJj4TZ+GfSh3j/FNoqlRmccBNw3R5odth8BHpF0A8Uw0u8REdEW7Q3+222v6yVjVfDft/z5HOAYis4EUfwlcENF3k3AYeUCcPcCZ1C08Xe6FrhY0p7AXhTNQn9eu/QREQ0T7Rvt0+FCSZcCXwZ+MXPRduX+6lUzfNcASPofwNG2Hy7PJ4DPVOTdKelcYAPFUM/LbN8u6ezy/lrb35F0PbCF4k+WS23fVlXoiIiB6WObf53h72W6Y4CNwOm2uy2ieSZwBMVQ+ZmvqKomeaD+UM8x4NGO80epsZmL7fXA+lnX1s46vwi4qGY5IiIGrw/Bv87w945076OoOFc5ynZPuyouZA/fmyX9DcU/w2nAJ3p5YUTE0OlPzb/O8HeAt1Iso39MjWdulHTk7C+QOuqO8/9Pkr4A/GZ56Uzbf7vQl0VEDKMFNPscKGlzx/lkOVoRagx/l7SSonJ9IvWC/4uA35X0A4o2fwHux1DPx9n+FuVWYRERI6V+8N9he3yee3WGv38AOL+cTFvnfatql2yW2sE/ImIkuW+jfeoMfx8HriwD/4HAKZJ22r5m/tL1JsE/IqJKf9r8K4e/2358b3RJlwOf6xL4AT7PL+dg7Q0cCtxJsQNjVwn+EREV+jHUs87w9x6euctIH0lHA/+mTt4E/4iIKn0a519n+HvH9d/r4fnfKucIVErwj4joZvE2aqnUsa4/FJtzHQ1sr5M3wT8iogvR3lU9+eW6/gA7KfoArqqTMcE/IqJCW4P/7qzrn+AfEVGlZcFf0seZv1S2/eaqZyT4R0RUaVnwBz43x7Ux4A8oRhJVSvCPiOimhTt52X68XV/Ss4A/pth8673Ax+o8Y49mihYRsYS45jFAkp4r6VPAdcDXgSNtf8T2oxVZgdT8IyIqtW0zF0mfoVgK4k+BtwFTwP4z6wHZfrDqGQn+EREV2tbsQ7Hip4H/ALy9vNa5ze6zqh6Q4B8R0U0LJ3nZPmR3n5E2/4iIKi1r85d0SMV9STqoW5rU/CMiumjpDN+LJO0BXAvcQrGkw97ArwInAC8BLqRYRnpOjdb8Ja2SdKekrZIu6JLuGElTkl7dZHkiInqhadc6BsX2a4B3As+h2Bf4RmAd8BaKJZ1PtP3Fbs9orObf0GbFERGD1cI2f4Aylr6j1/xNNvs0sVlxRMTAtbDZBwBJr5zj8k+AW23f3y1vk8G/b5sVS1oNrAYYGxvre0EjIrpqafAH3gy8EPhqeX48sBE4XNK7bX9yvoxNtvkvaLPibg+yPWl73Pb4ihUr+lbAiIg65HrHIpgGnmv7VbZfBRwJ/IKion1+t4xN1vyb2Kw4ImLw2lvzP8T2P3Sc3w8cbvtBSY91y9hk8G9is+KIiMFy+5Z36HCjpM8BnynPXw3cIGkf4MfdMjYW/JvYrDgiYtBaOs5/xjnAK4EXURT1E8BVtk0x3n9ejU7yanqz4oiIgXA7o79tS/o68ChF49TNZeCvlOUdIiIqtLXDV9LvADdTNPf8DvDNupNls7xDREQ3LZ3kVXoHcMzMmH5JK4AvAZ+typjgHxFRocUdvnvMmsz1ADVbdBL8IyIqtDj4Xy9pA/Dp8vx0ZvWzzifBPyKiG9PmDt/zJL0K+A2K0T6Ttv+mTt4E/4iICi0e6jmzmftVlQlnSfCPiKjSsuAv6WHmLpUoRoDuX/WMBP+IiC7aOMnL9n67+4wE/4iIbjzYjVoGJcE/IqLK0ov9Cf4REVXa1uzTDwn+ERHdGEizT0TECFp6sT8Lu0VEVOnXwm6SVkm6U9JWSRfMcf/1kraUx02Sjmri80Bq/hERlfox2kfSMuDDwEkUOx1ukrTO9h0dyX4AvNj2jySdDEwya+/zfknNPyKiGy/g6O5YYKvtu2w/ClwJnLrLq+ybbP+oPN1Isf1tI1Lzj4joopjkVbvmf6CkzR3nk7Yny99XAvd03NtG91r9m4Ev1H3xQiX4R0RUqb+q5w7b4/Pc0xzX5vxWkXQCRfB/Ue03L1CCf0REhQXU/LvZBhzccX4QcN8T3iU9H7gUONn2A/148VwabfNvU892RERP+tfmvwk4TNKhkvYCzgDWdSaQNAZcDbzB9vf69yGeqLGaf9t6tiMietOftX1s75R0LrABWAZcZvt2SWeX99cC7wKeClwiCWBnl2ak3dJks8/jPdsAkmZ6th8P/rZv6kjfaM92RETP+rSZi+31zNppqwz6M7+fBZzVl5dVaLLZZ66e7ZVd0s/bsy1ptaTNkjZv3769j0WMiKjgYhvHOscwaTL499Kzff5c921P2h63Pb5ixYo+FjEioga73jFEmmz2aVXPdkREz4YrrtfSZPB/vGcbuJeiZ/t1nQkG2bMdEdErTQ9Zm04NjQX/tvVsR0T0xCxkktfQaHSSV5t6tiMieiHcr0lerZIZvhERVRL8IyJGUIJ/RMSISZt/RMRoymifiIiRM3wTuOpI8I+I6MYk+EdEjKSl1+qT4B8RUSXj/CMiRlGCf0TEiLFhaum1+yT4R0RUSc0/ImIEJfhHRIwYA33Yw7dtEvwjIroyOG3+ERGjxaTDNyJiJKXNPyJiBCX4R0SMmizsFhExegwswSWd92jy4ZJWSbpT0lZJF8xxX5I+VN7fIunoJssTEdETu94xRBqr+UtaBnwYOAnYBmyStM72HR3JTgYOK4/jgI+UPyMiWmJpLu/QZM3/WGCr7btsPwpcCZw6K82pwBUubAQOkPSMBssUEbEwBnu61jFMlk1MTDTy4DVr1rwAeNrExMS68vxQ4LkTExPrO9KcDayfmJi4uzw/DfjmxMTEfZ3PkrR6zZo1H12zZs3q5cuX/8rb3va2+gUxHHX889CTT0dPPn2Xa7v83o9rTT131rV/esq7H/8sM59rl89Xpu9MN6yfterzD+T9LfmsI/Hfetb/r52f9Qmfu541C80w23vffdHEwXsdUW7q0v34/s+/9cOJiYnJ3X3nIMgNtVNJeg3wMttnledvAI61/daONJ8H/ovtr5fnXwb+yPYt8z13fHzcmzdvbqTMEbHkaHcf8JQ9V/iF+81utJjbhh9/7Bbb47v7zkFocrTPNuDgjvODgPt6SBMRsXjsjPZZoE3AYZIOlbQXcAawblaadcAby1E/LwB+YvuHDZYpImLhMtqnPts7JZ0LbACWAZfZvl3S2eX9tcB64BRgK/BT4MymyhMR0RvjqanFLkTfNTrJy/Z6igDfeW1tx+8GzmmyDBERu8VkSeeIiJE0ZMM462h0hm9ExLAz4GnXOqq0adWDBP+IiG7souZf5+iiY9WDk4EjgddKOnJWss5VD1ZTrHrQiAT/iIgKnpqqdVRo1aoHQ9fmf8stt2wAVi12OSJiNDzMjzZ8yZ89sGbyvSV1zkKdtD0z43clcE/HvW08cS2zudKsBPo+BH7ogr/tBP6IGJg+xpy5ZhvP7iiok6Yv0uwTETEYrVr1IME/ImIwWrXqwdA1+0REDKO2rXrQ2KqeERHRXmn2iYgYQQn+EREjKME/ImIEJfhHRIygBP+IiBGU4B8RMYIS/CMiRtD/B+2FgK+jHrdwAAAAAElFTkSuQmCC\n",
73 | "text/plain": [
74 | ""
75 | ]
76 | },
77 | "metadata": {
78 | "needs_background": "light"
79 | },
80 | "output_type": "display_data"
81 | }
82 | ],
83 | "source": [
84 | "clusterer = hdbscan.RobustSingleLinkage(cut=0.125, k=7)\n",
85 | "cluster_labels = clusterer.fit_predict(X)\n",
86 | "hierarchy = clusterer.cluster_hierarchy_\n",
87 | "alt_labels = hierarchy.get_clusters(0.100, 5)\n",
88 | "hierarchy.plot()"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 20,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "data": {
98 | "text/plain": [
99 | "array([ 82, 83, 122, ..., 444, 386, 43], dtype=int64)"
100 | ]
101 | },
102 | "execution_count": 20,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "cluster_labels"
109 | ]
110 | }
111 | ],
112 | "metadata": {
113 | "kernelspec": {
114 | "display_name": "Python 3",
115 | "language": "python",
116 | "name": "python3"
117 | },
118 | "language_info": {
119 | "codemirror_mode": {
120 | "name": "ipython",
121 | "version": 3
122 | },
123 | "file_extension": ".py",
124 | "mimetype": "text/x-python",
125 | "name": "python",
126 | "nbconvert_exporter": "python",
127 | "pygments_lexer": "ipython3",
128 | "version": "3.8.3"
129 | }
130 | },
131 | "nbformat": 4,
132 | "nbformat_minor": 4
133 | }
134 |
--------------------------------------------------------------------------------
/Sample Scripts/03 PySpark/ConnectToSpark.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pyspark.sql import SparkSession\n",
10 | "\n",
11 | "spark = SparkSession \\\n",
12 | " .builder \\\n",
13 | " .appName(\"Python Spark SQL basic example\") \\\n",
14 | " .master(\"spark://[YourServerIP]\") \\\n",
15 | " .getOrCreate()\n",
16 | "\n",
17 | "from pyspark import SparkConf, SparkContext\n",
18 | "from pyspark.sql import SparkSession, SQLContext\n",
19 | "sc = SparkContext()\n",
20 | "sqlq = SQLContext(sc)"
21 | ]
22 | }
23 | ],
24 | "metadata": {
25 | "kernelspec": {
26 | "display_name": "Python 3",
27 | "language": "python",
28 | "name": "python3"
29 | },
30 | "language_info": {
31 | "codemirror_mode": {
32 | "name": "ipython",
33 | "version": 3
34 | },
35 | "file_extension": ".py",
36 | "mimetype": "text/x-python",
37 | "name": "python",
38 | "nbconvert_exporter": "python",
39 | "pygments_lexer": "ipython3",
40 | "version": "3.8.3"
41 | }
42 | },
43 | "nbformat": 4,
44 | "nbformat_minor": 4
45 | }
46 |
--------------------------------------------------------------------------------
/Sample Scripts/03 PySpark/ExampleDataProcessing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pyspark import SparkConf, SparkContext\n",
10 | "from pyspark.sql import SparkSession, SQLContext\n",
11 | "import pyspark.sql.functions as func\n",
12 | "from pyspark.sql.types import *\n",
13 | "sc = SparkContext()\n",
14 | "sqlq = SQLContext(sc)\n",
15 | "import os"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 2,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "rdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").option(\"quote\",\"^\").load(\"[Path To Your File]\")"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "tnpdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").option(\"quote\",\"^\").load(\"[Path To Your File]\")"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 4,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "rdf.count()"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 7,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "rdf = rdf.filter(rdf.[YourColumn] != '')"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 5,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "rdf = rdf.filter(rdf.[YourColumn1] != 'SomeValue').filter(rdf.[YourColumn2] != 'SomeOtherValue')"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 11,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "ExcludeList = ['Value1','Value2',...'ValueN']\n",
70 | "rdf = rdf.filter(rdf.[YourColumnName].isin(ExcludeList) == False)"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 13,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "joined = rdf.join(tnpdf,func.lower(rdf.[YourJoinColumn]) == func.lower(tnpdf.[YourJoinColumn]), how=\"left\")"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 15,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "no[YourBooleanColumn] = joined.drop('[YourBooleanColumn]')"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 16,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "no[YourBooleanColumn].select(no[YourBooleanColumn].[YourColumnName]).show(n=20)"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 17,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "CheckFor[YourBooleanColumn] = func.udf(lambda x: 'No' if x is None else 'Yes', StringType())"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 18,
112 | "metadata": {},
113 | "outputs": [],
114 | "source": [
115 | "with[YourBooleanColumn] = no[YourBooleanColumn].withColumn('[YourBooleanColumn]',func.lit(CheckFor[YourBooleanColumn](joined.[YourColumnName])))"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": 19,
121 | "metadata": {},
122 | "outputs": [],
123 | "source": [
124 | "with[YourBooleanColumn].columns"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 22,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "CleanedResults.select('*').where(CleanedResults.[YourColumnName]=='FilterValue').show(n=20)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 23,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "CleanedResults.repartition(1).write.option('nullValue',None).csv('[Path To Your Output File]', sep='|',header='true',mode='overwrite')"
143 | ]
144 | }
145 | ],
146 | "metadata": {
147 | "kernelspec": {
148 | "display_name": "Python 3",
149 | "language": "python",
150 | "name": "python3"
151 | },
152 | "language_info": {
153 | "codemirror_mode": {
154 | "name": "ipython",
155 | "version": 3
156 | },
157 | "file_extension": ".py",
158 | "mimetype": "text/x-python",
159 | "name": "python",
160 | "nbconvert_exporter": "python",
161 | "pygments_lexer": "ipython3",
162 | "version": "3.8.3"
163 | }
164 | },
165 | "nbformat": 4,
166 | "nbformat_minor": 4
167 | }
168 |
--------------------------------------------------------------------------------
/Sample Scripts/03 PySpark/LoadCSV.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pyspark.sql import SparkSession\n",
10 | "\n",
11 | "spark = SparkSession \\\n",
12 | " .builder \\\n",
13 | " .appName(\"Python Spark SQL basic example\") \\\n",
14 | " .master(\"spark://spark://[YourServerIP]\") \\\n",
15 | " .getOrCreate()"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 1,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "from pyspark import SparkConf, SparkContext\n",
25 | "from pyspark.sql import SparkSession, SQLContext\n",
26 | "sc = SparkContext()\n",
27 | "sqlq = SQLContext(sc)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 15,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "tnpdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").load(\"[Path To Your CSV]\")"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 27,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/plain": [
47 | "3057038"
48 | ]
49 | },
50 | "execution_count": 27,
51 | "metadata": {},
52 | "output_type": "execute_result"
53 | }
54 | ],
55 | "source": [
56 | "rdf.count()"
57 | ]
58 | }
59 | ],
60 | "metadata": {
61 | "kernelspec": {
62 | "display_name": "Python 3",
63 | "language": "python",
64 | "name": "python3"
65 | },
66 | "language_info": {
67 | "codemirror_mode": {
68 | "name": "ipython",
69 | "version": 3
70 | },
71 | "file_extension": ".py",
72 | "mimetype": "text/x-python",
73 | "name": "python",
74 | "nbconvert_exporter": "python",
75 | "pygments_lexer": "ipython3",
76 | "version": "3.8.3"
77 | }
78 | },
79 | "nbformat": 4,
80 | "nbformat_minor": 4
81 | }
82 |
--------------------------------------------------------------------------------
/Sample Scripts/04 Windows Batch/call Python from SQL Server agent.txt:
--------------------------------------------------------------------------------
1 | python c:\temp\YourPythonScript.py
2 |
3 | SET EXITCODE = %ERRORLEVEL%
4 | IF %EXITCODE% EQ 0 (
5 | REM Process completed successfully.
6 | EXIT 0
7 | )
8 | IF %EXITCODE% EQ 1 (
9 | REM Process error.
10 | EXIT 1
11 | )
12 | IF %EXITCODE% EQ 2 (
13 | REM Custom error message.
14 | EXIT 2
15 | )
--------------------------------------------------------------------------------