├── .gitignore ├── ETL Framework Template ├── ETL Framework Template.sln ├── ETL Framework Template.v12.suo └── ETL Framework Template │ ├── ETL Framework Template.database │ ├── ETL Framework Template.dtproj │ ├── ETL Framework Template.dtproj.user │ ├── ETLTemplate.dtsx │ ├── Project.params │ ├── bin │ └── Development │ │ └── ETL Framework Template.ispac │ └── obj │ └── Development │ ├── AgspringETLTemplate.dtsx │ ├── BuildLog.xml │ ├── ETL Framework Template.dtproj │ └── Project.params ├── Framework Deployment Scripts ├── 01 Create Initial Databases │ ├── 01 CREATE DATABASE EDW.sql │ ├── 02 CREATE DATABASE ODS.sql │ ├── 03 CREATE DATABASE Reporting.sql │ ├── 04 CREATE DATABASE SSISManagement.sql │ └── 05 CREATE INITIAL SCHEMAS.sql ├── 02 Date Dimension │ ├── 01 CREATE AND POPULATE DimMonth AND DimDate.sql │ ├── 02 add empty and error rows.sql │ └── 03 ALTER TABLE DimDate ADD FiscalColumns.sql ├── 03 Framework Objects │ ├── 01 Create ETL Framework Tables.sql │ ├── 02 CREATE TABLE TableLoadReport.sql │ ├── 03 Create ETL Framework Stored Procs.sql │ ├── 04 CREATE PROC usp_ErrorLogByPackage.sql │ ├── 05 CREATE PROC usp_BatchRunTime.sql │ ├── 06 CREATE PROC usp_ErrorLogByBatch.sql │ ├── 07 CREATE PROC usp_GetVariableValues.sql │ ├── 08 CREATE PROC PackageRunTime.sql │ ├── 09 CREATE TABLE SSISConfigurations.sql │ ├── 10 CREATE TABLE Tables.sql │ ├── 11 CREATE TABLE LoadObservations.sql │ ├── 12 CREATE VIEW RowCountLog.sql │ ├── 13 CREATE VIEW ShowAdverseTableLoads.sql │ ├── 14 CREATE VIEW SSISErrorLogByPackage.sql │ ├── 15 CREATE VIEW SSISPackageBatchRunTime.sql │ ├── 16 CREATE VIEW SSISPackageRunTime.sql │ └── 17 CREATE FUNCTION udf_CleanDate.sql └── 04 Python Package Installation │ └── InstallFrameworkPackages.py ├── LICENSE ├── README.md ├── Rapid Development Tools └── data model creation tool.xlsm ├── Sample Directory Structures ├── Code Repo │ ├── Python │ │ ├── global_config │ │ │ └── test_config.yaml │ │ └── process_name │ │ │ └── config │ │ │ └── test_config.yaml │ └── SQL │ │ ├── Support Objects │ │ ├── Functions │ │ │ └── .gitkeep │ │ └── Views │ │ │ └── .gitkeep │ │ └── Warehouse Load │ │ ├── 01 Pull Data │ │ └── .gitkeep │ │ ├── 02 Clean Data │ │ └── .gitkeep │ │ ├── 03 Process Dimensions │ │ └── .gitkeep │ │ ├── 04 Process Facts │ │ └── .gitkeep │ │ ├── 05 Finalize And Audit │ │ └── .gitkeep │ │ ├── 06 Populate Reporting Tables │ │ └── .gitkeep │ │ └── 07 Monitoring │ │ └── .gitkeep ├── File Processing And Storage │ └── InterfaceAndExtractFiles │ │ ├── Business Unit Name │ │ └── ETL Process Name │ │ │ ├── Archive │ │ │ └── .gitkeep │ │ │ ├── In │ │ │ └── .gitkeep │ │ │ └── Out │ │ │ └── .gitkeep │ │ └── Third Party Name │ │ └── ETL Process Name │ │ ├── Archive │ │ └── .gitkeep │ │ ├── In │ │ └── .gitkeep │ │ └── Out │ │ └── .gitkeep └── Warehouse File IO │ └── FileExchange │ ├── Business Unit Name │ └── ETL Process Name │ │ ├── In │ │ └── .gitkeep │ │ └── Out │ │ └── .gitkeep │ └── Third Party Name │ └── ETL Process Name │ ├── In │ └── .gitkeep │ └── Out │ └── .gitkeep └── Sample Scripts ├── 01 SQL ├── 01 Finalize And Audit Scripts │ ├── 18 CREATE PROC usp_RecordRowCounts.sql │ ├── 19 CREATE PROC usp_MarkRecordsAsProcessed.sql │ └── 20 CREATE PROC usp_CheckForUnprocessedRecords.sql ├── 02 Monitoring Scripts │ ├── 21 CREATE PROC usp_DisplayTablesNotLoading.sql │ ├── 22 CREATE PROC usp_LoadTableLoadReportingTable.sql │ └── 23 CREATE PROC usp_TableLoadMonitoring.sql ├── 03 Table Object Sample Scripts │ ├── sample dimension table.sql │ ├── sample fact table.sql │ ├── sample indexed view.sql │ ├── sample junk dimension table.sql │ ├── sample master data management table.sql │ └── sample stage table.sql ├── 04 Data Processing Sample Scripts │ ├── batch processing.sql │ ├── bulk insert.sql │ ├── data cleansing.sql │ ├── fact table load.sql │ ├── remove dups.sql │ ├── type I dimension processing.sql │ └── type II dimension processing.sql └── 05 Helper Scripts │ ├── add rowhash.sql │ ├── change collation.sql │ ├── configuration insert sample script.sql │ ├── documentation block.sql │ ├── populate fact table with fake data.sql │ ├── proc execution scripts.sql │ ├── show all columns in database.sql │ ├── troubleshooting.sql │ └── utils.sql ├── 02 Python ├── Building Blocks │ ├── CombineCSVsIntoOneFile.py │ ├── ConvertCommaToPipeDelimitedFile.py │ ├── ConvertExcelToCSV.py │ ├── LoopingOverFilesInADirectory.py │ ├── ProcessZipFile.py │ ├── QueryDatabaseAndWriteLargeFile.py │ ├── QueryDatabaseAndWriteSmallFile.py │ ├── SendEmail.py │ ├── StringMatching.py │ └── YAMLConfigImport.py ├── Full Solutions │ ├── DownloadMoveAndStoreDataCSV.py │ ├── LoadLargeCSVsIntoDataWarehouseStagingTables.py │ ├── MoveAndStoreDataExcel.py │ └── ReloadFromArchive.py └── Jupyter Notebooks │ ├── Passive Monitoring System Design Theory.ipynb │ └── PotentialMDMProcess.ipynb ├── 03 PySpark ├── ConnectToSpark.ipynb ├── ExampleDataProcessing.ipynb └── LoadCSV.ipynb └── 04 Windows Batch └── call Python from SQL Server agent.txt /.gitignore: -------------------------------------------------------------------------------- 1 | workspace.xml 2 | *.bak 3 | *.docx 4 | .idea/ 5 | config.yaml 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.21005.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{159641D6-6404-4A2A-AE62-294DE0FE8301}") = "ETL Framework Template", "ETL Framework Template\ETL Framework Template.dtproj", "{20A0B72C-EC8F-41DA-BBE9-729FD188BB40}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Development|Default = Development|Default 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {20A0B72C-EC8F-41DA-BBE9-729FD188BB40}.Development|Default.ActiveCfg = Development 14 | {20A0B72C-EC8F-41DA-BBE9-729FD188BB40}.Development|Default.Build.0 = Development 15 | EndGlobalSection 16 | GlobalSection(SolutionProperties) = preSolution 17 | HideSolutionNode = FALSE 18 | EndGlobalSection 19 | EndGlobal 20 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template.v12.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/ETL Framework Template/ETL Framework Template.v12.suo -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/ETL Framework Template.database: -------------------------------------------------------------------------------- 1 |  2 | ETL Framework Template 3 | ETL Framework Template 4 | 0001-01-01T00:00:00Z 5 | 0001-01-01T00:00:00Z 6 | 0001-01-01T00:00:00Z 7 | Unprocessed 8 | 0001-01-01T00:00:00Z 9 | 10 | Default 11 | Unchanged 12 | 13 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/ETL Framework Template.dtproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | Project 4 | 12.0.2430.0 5 | 9.0.1.0 6 | $base64$PFNvdXJjZUNvbnRyb2xJbmZvIHhtbG5zOnhzZD0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEiIHhtbG5zOnhzaT0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEtaW5zdGFuY2UiIHhtbG5zOmRkbDI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yIiB4bWxuczpkZGwyXzI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yLzIiIHhtbG5zOmRkbDEwMF8xMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDA4L2VuZ2luZS8xMDAvMTAwIiB4bWxuczpkZGwyMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAiIHhtbG5zOmRkbDIwMF8yMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAvMjAwIiB4bWxuczpkZGwzMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAiIHhtbG5zOmRkbDMwMF8zMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAvMzAwIiB4bWxuczpkZGw0MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAiIHhtbG5zOmRkbDQwMF80MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAvNDAwIiB4bWxuczpkd2Q9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vRGF0YVdhcmVob3VzZS9EZXNpZ25lci8xLjAiPg0KICA8RW5hYmxlZD5mYWxzZTwvRW5hYmxlZD4NCiAgPFByb2plY3ROYW1lPjwvUHJvamVjdE5hbWU+DQogIDxBdXhQYXRoPjwvQXV4UGF0aD4NCiAgPExvY2FsUGF0aD48L0xvY2FsUGF0aD4NCiAgPFByb3ZpZGVyPjwvUHJvdmlkZXI+DQo8L1NvdXJjZUNvbnRyb2xJbmZvPg== 7 | 8 | ETL Framework Template.database 9 | ETL Framework Template.database 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | {a9026253-42ad-4f4e-aa43-1ba25643a651} 18 | ETL Framework Template 19 | 1 20 | 0 21 | 0 22 | 23 | 24 | 2016-01-27T10:18:43.3891069-06:00 25 | Teletran1\Bob 26 | TELETRAN1 27 | 28 | 29 | AQAAANCMnd8BFdERjHoAwE/Cl+sBAAAAoxXkRWWSP0mQp0K92RmiqAAAAAACAAAAAAAQZgAAAAEAACAAAACK73t63Bg6yo9Ub6Fot2dL+mBlJ665CHhLzWUt2KJaqwAAAAAOgAAAAAIAACAAAAA3abbbQ6Y1z5NpX3/lTTeDtG3GIWYsZ9mfhFm+qwcc3JAAAAApeIQkmm3d95I1Nwi85t1m6zXyR6AszAaN0c/jUvFgxhjDw0gDQarrEvOUdjbhbNCHUH4wl9+PqB51jmK2Mx/N3jxIziw+aW+P3eNKdauBultxKdC/qbz9DmA4lZne5fDIiIQdTakPAWE31Oenfu/wySuzx42jxx4f5yweNHgwBApBXy1vLZ6Ys2hZfykOrjVAAAAA6UaIROxPH0CuqjQFCDtA81ZSx5R6qQS1hrcVJ1OyaV/bd2Al9m4twany8wIau6AAzEKspWNkiNXAbV30FRoXVg== 30 | 1 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | {C479EBAA-4361-4720-8947-D43C86591D2B} 42 | ETLTemplate 43 | 1 44 | 0 45 | 76 46 | 47 | 48 | {2ABBFB1B-C134-487A-9F1E-8B0E55598978} 49 | 8 50 | 51 | 52 | 1 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 0 64 | 0 65 | 0 66 | Data Source=[your server];Initial Catalog=EDW;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False; 67 | 18 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 0 79 | 0 80 | 0 81 | EDW 82 | 18 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 0 94 | 0 95 | 1 96 | 18 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 0 108 | 0 109 | 0 110 | false 111 | 3 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 0 123 | 0 124 | 0 125 | [your server] 126 | 18 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 0 138 | 0 139 | 0 140 | 18 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 0 152 | 0 153 | 0 154 | Data Source=[your server];Initial Catalog=ODS;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False; 155 | 18 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 0 167 | 0 168 | 0 169 | ODS 170 | 18 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 0 182 | 0 183 | 1 184 | 18 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 0 196 | 0 197 | 0 198 | false 199 | 3 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 0 211 | 0 212 | 0 213 | [your server] 214 | 18 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 0 226 | 0 227 | 0 228 | 18 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 0 240 | 0 241 | 0 242 | Data Source=AgspringDW;Initial Catalog=SSISManagement;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False; 243 | 18 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 0 255 | 0 256 | 0 257 | SSISManagement 258 | 18 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 0 270 | 0 271 | 1 272 | 18 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 0 284 | 0 285 | 0 286 | false 287 | 3 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 0 299 | 0 300 | 0 301 | AgspringDW 302 | 18 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 0 314 | 0 315 | 0 316 | 18 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | Development 330 | 331 | bin 332 | 333 | 334 | 335 | 336 | 337 | 338 | LastModifiedTime 339 | LastModifiedTime 340 | 2016-03-24T20:15:25.1880445Z 341 | 342 | 343 | 344 | 345 | 346 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/ETL Framework Template.dtproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Development 6 | 7 | true 8 | 9 | 10 | false 11 | true 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/Project.params: -------------------------------------------------------------------------------- 1 |  2 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/bin/Development/ETL Framework Template.ispac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/ETL Framework Template/ETL Framework Template/bin/Development/ETL Framework Template.ispac -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/obj/Development/BuildLog.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ETL Framework Template 5 | 2016-04-06T20:56:34.9057414Z 6 | EncryptSensitiveWithUserKey 7 | 8 | 9 | 10 | AgspringETLTemplate.dtsx 11 | 2016-04-06T21:17:31.1938222Z 12 | EncryptSensitiveWithUserKey 13 | 14 | 15 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/obj/Development/ETL Framework Template.dtproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | Project 4 | 12.0.2430.0 5 | 9.0.1.0 6 | $base64$PFNvdXJjZUNvbnRyb2xJbmZvIHhtbG5zOnhzZD0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEiIHhtbG5zOnhzaT0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEtaW5zdGFuY2UiIHhtbG5zOmRkbDI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yIiB4bWxuczpkZGwyXzI9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDAzL2VuZ2luZS8yLzIiIHhtbG5zOmRkbDEwMF8xMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDA4L2VuZ2luZS8xMDAvMTAwIiB4bWxuczpkZGwyMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAiIHhtbG5zOmRkbDIwMF8yMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEwL2VuZ2luZS8yMDAvMjAwIiB4bWxuczpkZGwzMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAiIHhtbG5zOmRkbDMwMF8zMDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDExL2VuZ2luZS8zMDAvMzAwIiB4bWxuczpkZGw0MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAiIHhtbG5zOmRkbDQwMF80MDA9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vYW5hbHlzaXNzZXJ2aWNlcy8yMDEyL2VuZ2luZS80MDAvNDAwIiB4bWxuczpkd2Q9Imh0dHA6Ly9zY2hlbWFzLm1pY3Jvc29mdC5jb20vRGF0YVdhcmVob3VzZS9EZXNpZ25lci8xLjAiPg0KICA8RW5hYmxlZD5mYWxzZTwvRW5hYmxlZD4NCiAgPFByb2plY3ROYW1lPjwvUHJvamVjdE5hbWU+DQogIDxBdXhQYXRoPjwvQXV4UGF0aD4NCiAgPExvY2FsUGF0aD48L0xvY2FsUGF0aD4NCiAgPFByb3ZpZGVyPjwvUHJvdmlkZXI+DQo8L1NvdXJjZUNvbnRyb2xJbmZvPg== 7 | 8 | ETL Framework Template.database 9 | ETL Framework Template.database 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | {a9026253-42ad-4f4e-aa43-1ba25643a651} 18 | ETL Framework Template 19 | 1 20 | 0 21 | 0 22 | 23 | 24 | 2016-01-27T10:18:43.3891069-06:00 25 | Teletran1\Bob 26 | TELETRAN1 27 | 28 | 29 | AQAAANCMnd8BFdERjHoAwE/Cl+sBAAAAKWVJ8+Y0R0qCCKJfAf2yYAAAAAACAAAAAAADZgAAwAAAABAAAAD5rTNyuIACjLuEANeM4VmBAAAAAASAAACgAAAAEAAAAGR7+ybDlWCudzg7LVSzRX2IAAAAoggauLnNDyTbLXJxRQ4sn6bIBQQwZvC+mIG09yeDyeywtsY/0NjGxf8VEIWZNulvNpxialsQuE7T4WNN7zsZH8VzMtLBY1HtU/CxsfU5iMrhhCHJt81tmY9FCeyX/RuOHdJGeJ0M1Q5TQMw1vyZrIWUW+ETxeC8UZpcdxrxHgvZhOp73W8z6vxQAAABhbvGUmrDBi4P4Cifs4vvXJ758rQ== 30 | 1 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | {860DCA67-C5C3-4BB6-A243-ED8FA812EE9B} 42 | AgspringETLTemplate 43 | 1 44 | 0 45 | 64 46 | 47 | 48 | {F3692D1E-2A6D-4B0F-B8B2-495D2A146F24} 49 | 8 50 | 51 | 52 | 1 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 0 64 | 0 65 | 0 66 | Data Source=AgspringDW;Initial Catalog=SSISManagement;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False; 67 | 18 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 0 79 | 0 80 | 0 81 | SSISManagement 82 | 18 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 0 94 | 0 95 | 1 96 | 18 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 0 108 | 0 109 | 0 110 | false 111 | 3 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 0 123 | 0 124 | 0 125 | AgspringDW 126 | 18 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 0 138 | 0 139 | 0 140 | 18 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | Development 154 | 155 | bin 156 | 157 | 158 | 159 | 160 | 161 | 162 | LastModifiedTime 163 | LastModifiedTime 164 | 2016-03-24T20:15:25.1880445Z 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /ETL Framework Template/ETL Framework Template/obj/Development/Project.params: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/01 Create Initial Databases/01 CREATE DATABASE EDW.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct20 4 | -- Description: Create the data warehouse database. 5 | -- Directions for use: Replace [Your Data Warehouse Database Name] 6 | -- with the name specific to your EDW, run this script on all 7 | -- EDW related servers. 8 | -- ============================================= 9 | 10 | CREATE DATABASE [Your Data Warehouse Database Name] 11 | COLLATE Latin1_General_CS_AS -------------------------------------------------------------------------------- /Framework Deployment Scripts/01 Create Initial Databases/02 CREATE DATABASE ODS.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct20 4 | -- Description: Create the operational data store database. 5 | -- Directions for use: Simply run this script on all 6 | -- EDW related servers. 7 | -- ============================================= 8 | 9 | CREATE DATABASE ODS 10 | COLLATE Latin1_General_CS_AS -------------------------------------------------------------------------------- /Framework Deployment Scripts/01 Create Initial Databases/03 CREATE DATABASE Reporting.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct20 4 | -- Description: Create the Reporting database. 5 | -- Directions for use: Simply run this script on all 6 | -- EDW related servers. 7 | -- ============================================= 8 | 9 | CREATE DATABASE Reporting 10 | COLLATE Latin1_General_CS_AS -------------------------------------------------------------------------------- /Framework Deployment Scripts/01 Create Initial Databases/04 CREATE DATABASE SSISManagement.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct20 4 | -- Description: Create the SSISManagemt database. 5 | -- Directions for use: Simply run this script on all 6 | -- EDW related servers. 7 | -- ============================================= 8 | 9 | CREATE DATABASE [SSISManagement] 10 | COLLATE Latin1_General_CS_AS -------------------------------------------------------------------------------- /Framework Deployment Scripts/01 Create Initial Databases/05 CREATE INITIAL SCHEMAS.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct20 4 | -- Description: Create the universal schemas. 5 | -- Directions for use: Replace [Your Data Warehouse Database Name] 6 | -- with the name specific to your EDW, run this script on all 7 | -- EDW related servers. 8 | -- ============================================= 9 | 10 | USE [Your Data Warehouse Database Name] 11 | 12 | DROP SCHEMA IF EXISTS dw 13 | 14 | GO 15 | 16 | CREATE SCHEMA dw 17 | 18 | GO 19 | 20 | USE ODS 21 | 22 | DROP SCHEMA IF EXISTS cm 23 | 24 | GO 25 | 26 | CREATE SCHEMA cm 27 | 28 | GO 29 | 30 | DROP SCHEMA IF EXISTS rpt 31 | 32 | GO 33 | 34 | CREATE SCHEMA rpt 35 | 36 | GO 37 | 38 | DROP SCHEMA IF EXISTS vol 39 | 40 | GO 41 | 42 | CREATE SCHEMA vol 43 | 44 | GO 45 | 46 | DROP SCHEMA IF EXISTS mdm 47 | 48 | GO 49 | 50 | CREATE SCHEMA mdm 51 | 52 | GO 53 | 54 | DROP SCHEMA IF EXISTS ms 55 | 56 | GO 57 | 58 | CREATE SCHEMA ms -------------------------------------------------------------------------------- /Framework Deployment Scripts/02 Date Dimension/01 CREATE AND POPULATE DimMonth AND DimDate.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Framework Deployment Scripts/02 Date Dimension/01 CREATE AND POPULATE DimMonth AND DimDate.sql -------------------------------------------------------------------------------- /Framework Deployment Scripts/02 Date Dimension/02 add empty and error rows.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 19Oct20 4 | -- Description: Add empty and error records to date dimension 5 | -- Directions for use: Replace [YourDataWarehouseName] with the specific name of your EDW and run. 6 | -- ============================================= 7 | USE [YourDataWarehouseName] 8 | 9 | INSERT INTO dw.DimDate(DateCK, FormattedDate) 10 | SELECT '00000000', 'No Date' 11 | UNION ALL 12 | SELECT '11111111', 'Not A Date' -------------------------------------------------------------------------------- /Framework Deployment Scripts/02 Date Dimension/03 ALTER TABLE DimDate ADD FiscalColumns.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Framework Deployment Scripts/02 Date Dimension/03 ALTER TABLE DimDate ADD FiscalColumns.sql -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/01 Create ETL Framework Tables.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: This script builds the tables that are used to log SSIS package activity. 5 | -- Directions for use:Simply run the script. 6 | -- ============================================= 7 | 8 | USE [SSISManagement] 9 | GO 10 | 11 | CREATE TABLE [dbo].[Package]( 12 | [PackageID] [int] IDENTITY(1,1) NOT NULL, 13 | [PackageGUID] [uniqueidentifier] NOT NULL, 14 | [PackageName] [varchar](255) NOT NULL, 15 | [CreationDate] [datetime] NOT NULL, 16 | [CreatedBy] [varchar](255) NOT NULL, 17 | [EnteredDateTime] [datetime] NOT NULL DEFAULT getdate(), 18 | CONSTRAINT [PK_Package] PRIMARY KEY CLUSTERED ( 19 | [PackageID] ASC 20 | )) 21 | GO 22 | 23 | CREATE TABLE [dbo].[PackageVersion]( 24 | [PackageVersionID] [int] IDENTITY(1,1) NOT NULL, 25 | [PackageVersionGUID] [uniqueidentifier] NOT NULL, 26 | [PackageID] [int] NOT NULL, 27 | [VersionMajor] [int] NOT NULL, 28 | [VersionMinor] [int] NOT NULL, 29 | [VersionBuild] [int] NOT NULL, 30 | [VersionComment] [varchar](1000) NOT NULL, 31 | [EnteredDateTime] [datetime] NOT NULL DEFAULT getdate(), 32 | CONSTRAINT [PK_PackageVersion] PRIMARY KEY CLUSTERED ( 33 | [PackageVersionID] ASC 34 | )) 35 | GO 36 | ALTER TABLE [dbo].[PackageVersion] WITH CHECK ADD CONSTRAINT [FK_PackageVersion_Package] FOREIGN KEY([PackageID]) 37 | REFERENCES [dbo].[Package] ([PackageID]) 38 | GO 39 | ALTER TABLE [dbo].[PackageVersion] CHECK CONSTRAINT [FK_PackageVersion_Package] 40 | GO 41 | 42 | CREATE TABLE [dbo].[BatchLog]( 43 | [BatchLogID] [int] IDENTITY(1,1) NOT NULL, 44 | [StartDateTime] [datetime] NOT NULL DEFAULT getdate(), 45 | [EndDateTime] [datetime] NULL, 46 | [Status] [char](1) NOT NULL, 47 | CONSTRAINT [PK_BatchLog] PRIMARY KEY CLUSTERED ( 48 | [BatchLogID] ASC 49 | )) 50 | GO 51 | 52 | CREATE TABLE [dbo].[PackageLog]( 53 | [PackageLogID] [int] IDENTITY(1,1) NOT NULL, 54 | [BatchLogID] [int] NOT NULL, 55 | [PackageVersionID] [int] NOT NULL, 56 | [ExecutionInstanceID] [uniqueidentifier] NOT NULL, 57 | [MachineName] [varchar](64) NOT NULL, 58 | [UserName] [varchar](64) NOT NULL, 59 | [StartDateTime] [datetime] NOT NULL DEFAULT getdate(), 60 | [EndDateTime] [datetime] NULL, 61 | [Status] [char](1) NOT NULL, 62 | CONSTRAINT [PK_PackageLog] PRIMARY KEY CLUSTERED ( 63 | [PackageLogID] ASC 64 | )) 65 | GO 66 | ALTER TABLE [dbo].[PackageLog] WITH CHECK ADD CONSTRAINT [FK_PackageLog_BatchLog] FOREIGN KEY([BatchLogID]) 67 | REFERENCES [dbo].[BatchLog] ([BatchLogID]) 68 | GO 69 | ALTER TABLE [dbo].[PackageLog] CHECK CONSTRAINT [FK_PackageLog_BatchLog] 70 | GO 71 | ALTER TABLE [dbo].[PackageLog] WITH CHECK ADD CONSTRAINT [FK_PackageLog_PackageVersion] FOREIGN KEY([PackageVersionID]) 72 | REFERENCES [dbo].[PackageVersion] ([PackageVersionID]) 73 | GO 74 | ALTER TABLE [dbo].[PackageLog] CHECK CONSTRAINT [FK_PackageLog_PackageVersion] 75 | GO 76 | 77 | CREATE TABLE [dbo].[PackageErrorLog]( 78 | [PackageErrorLogID] [int] IDENTITY(1,1) NOT NULL, 79 | [PackageLogID] [int] NOT NULL, 80 | [SourceName] [varchar](64) NOT NULL, 81 | [SourceID] [uniqueidentifier] NOT NULL, 82 | [ErrorCode] [int] NULL, 83 | [ErrorDescription] [varchar](2000) NULL, 84 | [LogDateTime] [datetime] NOT NULL, 85 | CONSTRAINT [PK_PackageErrorLog] PRIMARY KEY CLUSTERED ( 86 | [PackageErrorLogID] ASC 87 | )) 88 | GO 89 | ALTER TABLE [dbo].[PackageErrorLog] WITH CHECK ADD CONSTRAINT [FK_PackageErrorLog_PackageLog] FOREIGN KEY([PackageLogID]) 90 | REFERENCES [dbo].[PackageLog] ([PackageLogID]) 91 | GO 92 | ALTER TABLE [dbo].[PackageErrorLog] CHECK CONSTRAINT [FK_PackageErrorLog_PackageLog] 93 | GO 94 | 95 | CREATE TABLE [dbo].[PackageTaskLog]( 96 | [PackageTaskLogID] [int] IDENTITY(1,1) NOT NULL, 97 | [PackageLogID] [int] NOT NULL, 98 | [SourceName] [varchar](255) NOT NULL, 99 | [SourceID] [uniqueidentifier] NOT NULL, 100 | [StartDateTime] [datetime] NOT NULL, 101 | [EndDateTime] [datetime] NULL, 102 | CONSTRAINT [PK_PackageTaskLog] PRIMARY KEY CLUSTERED ( 103 | [PackageTaskLogID] ASC 104 | )) 105 | GO 106 | ALTER TABLE [dbo].[PackageTaskLog] WITH CHECK ADD CONSTRAINT [FK_PackageTaskLog_PackageLog] FOREIGN KEY([PackageLogID]) 107 | REFERENCES [dbo].[PackageLog] ([PackageLogID]) 108 | GO 109 | ALTER TABLE [dbo].[PackageTaskLog] CHECK CONSTRAINT [FK_PackageTaskLog_PackageLog] 110 | GO 111 | 112 | CREATE TABLE [dbo].[PackageVariableLog]( 113 | [PackageVariableLogID] [int] IDENTITY(1,1) NOT NULL, 114 | [PackageLogID] [int] NOT NULL, 115 | [VariableName] [varchar](255) NOT NULL, 116 | [VariableValue] [varchar](max) NOT NULL, 117 | [LogDateTime] [datetime] NOT NULL, 118 | CONSTRAINT [PK_PackageVariableLog] PRIMARY KEY CLUSTERED ( 119 | [PackageVariableLogID] ASC 120 | )) 121 | GO 122 | ALTER TABLE [dbo].[PackageVariableLog] WITH CHECK ADD CONSTRAINT [FK_PackageVariableLog_PackageLog] FOREIGN KEY([PackageLogID]) 123 | REFERENCES [dbo].[PackageLog] ([PackageLogID]) 124 | GO 125 | ALTER TABLE [dbo].[PackageVariableLog] CHECK CONSTRAINT [FK_PackageVariableLog_PackageLog] 126 | GO 127 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/02 CREATE TABLE TableLoadReport.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: This builds the table that holds volumetric data for table loads. 5 | -- Directions for use:Simply run the script. 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP TABLE IF EXISTS [rpt].[TableLoadReport] 13 | GO 14 | 15 | SET ANSI_NULLS ON 16 | GO 17 | 18 | SET QUOTED_IDENTIFIER ON 19 | GO 20 | 21 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[rpt].[TableLoadReport]') AND type in (N'U')) 22 | BEGIN 23 | CREATE TABLE [rpt].[TableLoadReport]( 24 | [TableLoadReportID] [bigint] IDENTITY(1,1) NOT NULL, 25 | [TableName] [nvarchar](50) NULL, 26 | [DateOfCurrentObservation] [datetime] NULL, 27 | [DateOfLastObservation] [datetime] NULL, 28 | [RowCount] [int] NULL, 29 | [ChangeFromLastObservation] [int] NULL, 30 | [CurrentThreeSDLevel] [numeric](18, 4) NULL, 31 | [AsOf] [date] NULL, 32 | CONSTRAINT [PK_TableLoadReport] PRIMARY KEY CLUSTERED 33 | ( 34 | [TableLoadReportID] ASC 35 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 36 | ) ON [PRIMARY] 37 | END 38 | GO 39 | 40 | 41 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/03 Create ETL Framework Stored Procs.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: These are the stored procedures that SSIS uses to log activity. 5 | -- Note that they do not conform to the naming convention for stored procedures. 6 | -- Directions for use:Simply run the script. 7 | -- ============================================= 8 | 9 | USE [SSISManagement] 10 | GO 11 | 12 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageStart] 13 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageEnd] 14 | DROP PROCEDURE IF EXISTS [dbo].[LogPackageError] 15 | DROP PROCEDURE IF EXISTS [dbo].[LogTaskPreExecute] 16 | DROP PROCEDURE IF EXISTS [dbo].[LogTaskPostExecute] 17 | DROP PROCEDURE IF EXISTS [dbo].[LogVariableValueChanged] 18 | GO 19 | 20 | CREATE PROCEDURE [dbo].[LogPackageStart] 21 | ( @BatchLogID int 22 | ,@PackageName varchar(255) 23 | ,@ExecutionInstanceID uniqueidentifier 24 | ,@MachineName varchar(64) 25 | ,@UserName varchar(64) 26 | ,@StartDateTime datetime 27 | ,@PackageVersionGUID uniqueidentifier 28 | ,@VersionMajor int 29 | ,@VersionMinor int 30 | ,@VersionBuild int 31 | ,@VersionComment varchar(1000) 32 | ,@PackageGUID uniqueidentifier 33 | ,@CreationDate datetime 34 | ,@CreatedBy varchar(255) 35 | ) 36 | 37 | AS 38 | BEGIN 39 | SET NOCOUNT ON 40 | 41 | DECLARE @PackageID int 42 | ,@PackageVersionID int 43 | ,@PackageLogID int 44 | ,@EndBatchAudit bit 45 | 46 | /* Initialize Variables */ 47 | SELECT @EndBatchAudit = 0 48 | 49 | /* Get Package Metadata ID */ 50 | IF NOT EXISTS (SELECT 1 FROM dbo.Package WHERE PackageGUID = @PackageGUID AND PackageName = @PackageName) 51 | Begin 52 | INSERT INTO dbo.Package (PackageGUID, PackageName, CreationDate, CreatedBy) 53 | VALUES (@PackageGUID, @PackageName, @CreationDate, @CreatedBy) 54 | End 55 | 56 | SELECT @PackageID = PackageID 57 | FROM dbo.Package 58 | WHERE PackageGUID = @PackageGUID 59 | AND PackageName = @PackageName 60 | 61 | /* Get Package Version MetaData ID */ 62 | IF NOT EXISTS (SELECT 1 FROM dbo.PackageVersion WHERE PackageVersionGUID = @PackageVersionGUID) 63 | Begin 64 | INSERT INTO dbo.PackageVersion (PackageID, PackageVersionGUID, VersionMajor, VersionMinor, VersionBuild, VersionComment) 65 | VALUES (@PackageID, @PackageVersionGUID, @VersionMajor, @VersionMinor, @VersionBuild, @VersionComment) 66 | End 67 | SELECT @PackageVersionID = PackageVersionID 68 | FROM dbo.PackageVersion 69 | WHERE PackageVersionGUID = @PackageVersionGUID 70 | 71 | /* Get BatchLogID */ 72 | IF ISNULL(@BatchLogID,0) = 0 73 | Begin 74 | INSERT INTO dbo.BatchLog (StartDateTime, [Status]) 75 | VALUES (@StartDateTime, 'R') 76 | SELECT @BatchLogID = SCOPE_IDENTITY() 77 | SELECT @EndBatchAudit = 1 78 | End 79 | 80 | /* Create PackageLog Record */ 81 | INSERT INTO dbo.PackageLog (BatchLogID, PackageVersionID, ExecutionInstanceID, MachineName, UserName, StartDateTime, [Status]) 82 | VALUES(@BatchLogID, @PackageVersionID, @ExecutionInstanceID, @MachineName, @UserName, @StartDateTime, 'R') 83 | 84 | SELECT @PackageLogID = SCOPE_IDENTITY() 85 | 86 | SELECT @BatchLogID as BatchLogID, @PackageLogID as PackageLogID, @EndBatchAudit as EndBatchAudit 87 | 88 | END 89 | GO 90 | 91 | CREATE PROCEDURE [dbo].[LogPackageEnd] 92 | ( @PackageLogID int 93 | ,@BatchLogID int 94 | ,@EndBatchAudit bit 95 | ) 96 | 97 | AS 98 | BEGIN 99 | SET NOCOUNT ON 100 | UPDATE dbo.PackageLog 101 | SET Status = 'S' 102 | , EndDateTime = getdate() 103 | WHERE PackageLogID = @PackageLogID 104 | 105 | IF @EndBatchAudit = 1 106 | Begin 107 | UPDATE dbo.BatchLog 108 | SET Status = 'S' 109 | , EndDateTime = getdate() 110 | WHERE BatchLogID = @BatchLogID 111 | End 112 | END 113 | GO 114 | 115 | CREATE PROCEDURE [dbo].[LogPackageError] 116 | ( @PackageLogID int 117 | ,@BatchLogID int 118 | ,@SourceName varchar(64) 119 | ,@SourceID uniqueidentifier 120 | ,@ErrorCode int 121 | ,@ErrorDescription varchar(2000) 122 | ,@EndBatchAudit bit 123 | ) 124 | 125 | AS 126 | BEGIN 127 | SET NOCOUNT ON 128 | INSERT INTO dbo.PackageErrorLog (PackageLogID, SourceName, SourceID, ErrorCode, ErrorDescription, LogDateTime) 129 | VALUES (@PackageLogID, @SourceName, @SourceID, @ErrorCode, @ErrorDescription, getdate()) 130 | 131 | UPDATE dbo.PackageLog 132 | SET Status = 'F' 133 | WHERE PackageLogID = @PackageLogID 134 | 135 | IF @EndBatchAudit = 1 136 | Begin 137 | UPDATE dbo.BatchLog 138 | SET Status = 'F' 139 | , EndDateTime = getdate() 140 | WHERE BatchLogID = @BatchLogID 141 | End 142 | END 143 | GO 144 | 145 | CREATE PROCEDURE [dbo].[LogTaskPreExecute] 146 | ( @PackageLogID int 147 | ,@SourceName varchar(64) 148 | ,@SourceID uniqueidentifier 149 | ,@PackageID uniqueidentifier 150 | ) 151 | 152 | AS 153 | BEGIN 154 | SET NOCOUNT ON 155 | IF @PackageID <> @SourceID 156 | AND @SourceName <> 'SQL LogPackageStart' 157 | AND @SourceName <> 'SQL LogPackageEnd' 158 | INSERT INTO dbo.PackageTaskLog (PackageLogID, SourceName, SourceID, StartDateTime) 159 | VALUES (@PackageLogID, @SourceName, @SourceID, getdate()) 160 | END 161 | GO 162 | 163 | CREATE PROCEDURE [dbo].[LogTaskPostExecute] 164 | ( @PackageLogID int 165 | ,@SourceID uniqueidentifier 166 | ,@PackageID uniqueidentifier 167 | ) 168 | 169 | AS 170 | BEGIN 171 | SET NOCOUNT ON 172 | IF @PackageID <> @SourceID 173 | UPDATE dbo.PackageTaskLog 174 | SET EndDateTime = getdate() 175 | WHERE PackageLogID = @PackageLogID AND SourceID = @SourceID 176 | AND EndDateTime is null 177 | END 178 | GO 179 | 180 | CREATE PROCEDURE [dbo].[LogVariableValueChanged] 181 | ( @PackageLogID int 182 | ,@VariableName varchar(255) 183 | ,@VariableValue varchar(max) 184 | ) 185 | AS 186 | BEGIN 187 | SET NOCOUNT ON 188 | INSERT INTO dbo.PackageVariableLog(PackageLogID, VariableName, VariableValue, LogDateTime) 189 | VALUES (@PackageLogID, @VariableName, @VariableValue, getdate()) 190 | END 191 | GO 192 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/04 CREATE PROC usp_ErrorLogByPackage.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 28May14 4 | -- Description: This allows you to see the performance of a 5 | -- specific package by passing in the package name as a parameter. 6 | -- Directions for use: Pass in a package name without the extension. 7 | -- ============================================= 8 | 9 | USE SSISManagement 10 | 11 | 12 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'ErrorLogByPackage') AND type in (N'P', N'PC')) 13 | DROP PROCEDURE IF EXISTS usp_ErrorLogByPackage 14 | GO 15 | 16 | CREATE PROCEDURE usp_ErrorLogByPackage (@PackageName VARCHAR(MAX)) 17 | AS 18 | BEGIN 19 | SET NOCOUNT ON 20 | 21 | 22 | SELECT BL.BatchLogID AS [Batch Log ID], 23 | P.PackageName AS [Package Name], 24 | PEL.SourceName AS [Task Name], 25 | PEL.ErrorDescription AS [Error Description], 26 | PEL.LogDateTime AS [Log Date Time] 27 | FROM PackageErrorLog PEL 28 | JOIN PackageLog PL 29 | ON PEL.PackageLogID = PL.PackageLogID 30 | JOIN PackageVersion PV 31 | ON PL.PackageVersionID = PV.PackageVersionID 32 | JOIN Package P 33 | ON PV.PackageID = P.PackageID 34 | JOIN BatchLog BL 35 | ON PL.BatchLogID = BL.BatchLogID 36 | WHERE P.PackageName = @PackageName 37 | ORDER BY PEL.LogDateTime Desc 38 | 39 | END -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/05 CREATE PROC usp_BatchRunTime.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 28May14 4 | -- Description: Lets you know how long a batch ran for. 5 | -- Directions for use:Pass in a batch ID. 6 | -- ============================================= 7 | 8 | USE SSISManagement 9 | 10 | 11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'BatchRunTime') AND type in (N'P', N'PC')) 12 | DROP PROCEDURE IF EXISTS usp_BatchRunTime 13 | GO 14 | 15 | CREATE PROCEDURE usp_BatchRunTime (@BatchID INT) 16 | AS 17 | BEGIN 18 | SET NOCOUNT ON 19 | 20 | 21 | SELECT 22 | BL.BatchLogID AS [Batch ID], 23 | CONVERT(TIME,BL.EndDateTime - BL.StartDateTime) AS [Run Time], 24 | [BL].[Status] 25 | FROM BatchLog BL 26 | 27 | WHERE BL.BatchLogID = @BatchID 28 | 29 | END -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/06 CREATE PROC usp_ErrorLogByBatch.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 28May14 4 | -- Description: Error log by batch 5 | -- Directions for use: Pass in a batch ID. 6 | -- ============================================= 7 | 8 | USE SSISManagement 9 | 10 | 11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'ErrorLogByBatch') AND type in (N'P', N'PC')) 12 | DROP PROCEDURE IF EXISTS usp_ErrorLogByBatch 13 | GO 14 | 15 | CREATE PROCEDURE usp_ErrorLogByBatch (@BatchID INT) 16 | AS 17 | BEGIN 18 | SET NOCOUNT ON 19 | 20 | 21 | SELECT BL.BatchLogID AS [Batch Log ID], 22 | P.PackageName AS [Package Name], 23 | PEL.SourceName AS [Task Name], 24 | PEL.ErrorDescription AS [Error Description], 25 | PEL.LogDateTime AS [Log Date Time] 26 | FROM PackageErrorLog PEL 27 | JOIN PackageLog PL 28 | ON PEL.PackageLogID = PL.PackageLogID 29 | JOIN PackageVersion PV 30 | ON PL.PackageVersionID = PV.PackageVersionID 31 | JOIN Package P 32 | ON PV.PackageID = P.PackageID 33 | JOIN BatchLog BL 34 | ON PL.BatchLogID = BL.BatchLogID 35 | WHERE BL.BatchLogID = @BatchID 36 | ORDER BY PEL.LogDateTime Desc 37 | 38 | END -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/07 CREATE PROC usp_GetVariableValues.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 28May14 4 | -- Description: Find the variables and their values 5 | -- that are currently being used for package configuration. 6 | -- Directions for use: Pass in a package name without the file extension. 7 | -- ============================================= 8 | 9 | USE SSISManagement 10 | GO 11 | 12 | 13 | SET ANSI_NULLS ON 14 | GO 15 | 16 | SET QUOTED_IDENTIFIER ON 17 | GO 18 | 19 | DROP PROCEDURE IF EXISTS usp_GetVariableValues 20 | 21 | GO 22 | 23 | CREATE PROC usp_GetVariableValues @packageName NVARCHAR(255) 24 | AS 25 | SELECT VariableName, VariableValue 26 | FROM SSISConfigurations 27 | WHERE PackageName = @packageName 28 | 29 | 30 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/08 CREATE PROC PackageRunTime.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 28May14 4 | -- Description: Lets you know how long a package ran for 5 | -- Directions for use:Pass in a package name without the file extension. 6 | -- ============================================= 7 | 8 | USE SSISManagement 9 | 10 | 11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'PackageRunTime') AND type in (N'P', N'PC')) 12 | DROP PROCEDURE IF EXISTS usp_PackageRunTime 13 | GO 14 | 15 | CREATE PROCEDURE usp_PackageRunTime (@PackageName VARCHAR(MAX)) 16 | AS 17 | BEGIN 18 | SET NOCOUNT ON 19 | 20 | 21 | SELECT 22 | pkg.PackageName, 23 | pkglog.StartDateTime AS [Package Start Time], 24 | CONVERT(TIME,pkglog.EndDateTime - pkglog.StartDateTime) AS [Run Time], 25 | CASE pkglog.Status 26 | WHEN 'S' THEN 'Success' 27 | WHEN 'F' THEN 'Fail' 28 | WHEN 'R' THEN 'Running' 29 | END AS [Package Status] 30 | FROM dbo.PackageLog pkglog 31 | JOIN dbo.PackageVersion pkgvers 32 | ON pkglog.PackageVersionID = pkgvers.PackageVersionID 33 | JOIN dbo.Package pkg 34 | ON pkgvers.PackageID = pkg.PackageID 35 | WHERE pkg.PackageName = @PackageName 36 | ORDER BY pkglog.StartDateTime DESC 37 | 38 | END -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/09 CREATE TABLE SSISConfigurations.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: Creates the table that holds the configuration values for SSIS packages. 5 | -- Directions for use: Simply run the script. 6 | -- ============================================= 7 | 8 | USE [SSISManagement] 9 | GO 10 | 11 | 12 | 13 | DROP TABLE IF EXISTS [dbo].[SSISConfigurations] 14 | GO 15 | 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[SSISConfigurations]') AND type in (N'U')) 24 | BEGIN 25 | CREATE TABLE [dbo].[SSISConfigurations]( 26 | [ProcessConfigurationID] [int] IDENTITY(1,1) NOT NULL, 27 | [PackageName] [nvarchar](255) NOT NULL, 28 | [VariableName] [nvarchar](255) NOT NULL, 29 | [VariableValue] [nvarchar](255) NOT NULL, 30 | CONSTRAINT [PK_ProcessConfiguration] PRIMARY KEY CLUSTERED 31 | ( 32 | [ProcessConfigurationID] ASC 33 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 34 | ) ON [PRIMARY] 35 | END 36 | GO 37 | 38 | 39 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/10 CREATE TABLE Tables.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7Dec20 4 | -- Description: Creates Tables table. 5 | -- Change Log: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP TABLE IF EXISTS [vol].[Tables] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[Tables]') AND type in (N'U')) 23 | BEGIN 24 | CREATE TABLE [vol].[Tables]( 25 | [TableID] [bigint] IDENTITY(1,1) NOT NULL, 26 | [TableName] [nvarchar](50) NOT NULL, 27 | [DateCreated] [datetime] NOT NULL, 28 | CONSTRAINT [PK_Tables] PRIMARY KEY CLUSTERED 29 | ( 30 | [TableID] ASC 31 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 32 | ) ON [PRIMARY] 33 | END 34 | GO 35 | 36 | 37 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/11 CREATE TABLE LoadObservations.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7Dec20 4 | -- Description: Creates the load obvservations table. 5 | -- Change Log: 6 | -- 15Oct20 Created doc bloc sample. - BW 7 | -- ============================================= 8 | 9 | USE [ODS] 10 | GO 11 | 12 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[LoadObservations]') AND type in (N'U')) 13 | ALTER TABLE [vol].[LoadObservations] DROP CONSTRAINT IF EXISTS [FK_LoadObservations_Tables] 14 | GO 15 | 16 | 17 | DROP TABLE IF EXISTS [vol].[LoadObservations] 18 | GO 19 | 20 | 21 | SET ANSI_NULLS ON 22 | GO 23 | 24 | SET QUOTED_IDENTIFIER ON 25 | GO 26 | 27 | IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[vol].[LoadObservations]') AND type in (N'U')) 28 | BEGIN 29 | CREATE TABLE [vol].[LoadObservations]( 30 | [LoadObservationID] [bigint] IDENTITY(1,1) NOT NULL, 31 | [TableID] [bigint] NOT NULL, 32 | [DateOfCurrentObservation] [datetime] NULL, 33 | [DateOfLastObservation] [datetime] NULL, 34 | [RowCount] [bigint] NULL, 35 | [ChangeFromLastObservation] [bigint] NULL, 36 | [CurrentThreeSDLevel] [numeric](18, 4) NULL, 37 | CONSTRAINT [PK_LoadObservations] PRIMARY KEY CLUSTERED 38 | ( 39 | [LoadObservationID] ASC 40 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 41 | ) ON [PRIMARY] 42 | END 43 | GO 44 | 45 | IF NOT EXISTS (SELECT * FROM sys.foreign_keys WHERE object_id = OBJECT_ID(N'[vol].[FK_LoadObservations_Tables]') AND parent_object_id = OBJECT_ID(N'[vol].[LoadObservations]')) 46 | ALTER TABLE [vol].[LoadObservations] WITH CHECK ADD CONSTRAINT [FK_LoadObservations_Tables] FOREIGN KEY([TableID]) 47 | REFERENCES [vol].[Tables] ([TableID]) 48 | GO 49 | 50 | IF EXISTS (SELECT * FROM sys.foreign_keys WHERE object_id = OBJECT_ID(N'[vol].[FK_LoadObservations_Tables]') AND parent_object_id = OBJECT_ID(N'[vol].[LoadObservations]')) 51 | ALTER TABLE [vol].[LoadObservations] CHECK CONSTRAINT [FK_LoadObservations_Tables] 52 | GO 53 | 54 | 55 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/12 CREATE VIEW RowCountLog.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: A simple log of the amount of records that get processed in each load. 5 | -- Directions for use: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP VIEW IF EXISTS [dbo].[RowCountLog] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | 23 | CREATE VIEW [dbo].[RowCountLog] 24 | AS 25 | 26 | SELECT 27 | [TableName], 28 | [DateOfCurrentObservation], 29 | [DateOfLastObservation], 30 | [RowCount], 31 | [ChangeFromLastObservation], 32 | [CurrentThreeSDLevel] 33 | FROM [vol].[Tables] t 34 | JOIN [vol].[LoadObservations] lo 35 | ON t.TableID = lo.TableID 36 | GO 37 | 38 | 39 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/13 CREATE VIEW ShowAdverseTableLoads.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: Show tables with anomalous data loads. 5 | -- Directions for use: 6 | -- ============================================= 7 | USE [ODS] 8 | GO 9 | 10 | 11 | DROP VIEW IF EXISTS [dbo].[ShowAdverseTableLoads] 12 | GO 13 | 14 | 15 | SET ANSI_NULLS ON 16 | GO 17 | 18 | SET QUOTED_IDENTIFIER ON 19 | GO 20 | 21 | 22 | 23 | CREATE VIEW [dbo].[ShowAdverseTableLoads] AS 24 | 25 | --Find Adverse Loads 26 | SELECT t.TableID, t.TableName, lo.[RowCount] AS [Records In Table], lo.ChangeFromLastObservation AS [Records Loaded], lo.CurrentThreeSDLevel AS [Current Three Standard Deviation Level] 27 | FROM ODS.[vol].[Tables] t 28 | JOIN ODS.[vol].[LoadObservations] lo 29 | ON t.TableID = lo.TableID 30 | WHERE t.TableID NOT IN( 31 | --Get rid of tables with no records 32 | SELECT t.TableID 33 | FROM ODS.[vol].[Tables] t 34 | JOIN ODS.[vol].[LoadObservations] lo 35 | ON t.TableID = lo.TableID 36 | GROUP BY t.TableID 37 | HAVING AVG(lo.[RowCount]) = 0 38 | UNION 39 | --Get rid of tables with no load variance 40 | SELECT t.TableID 41 | FROM ODS.[vol].[Tables] t 42 | JOIN ODS.[vol].[LoadObservations] lo 43 | ON t.TableID = lo.TableID 44 | GROUP BY t.TableID 45 | HAVING AVG(lo.CurrentThreeSDLevel) = 0 46 | ) 47 | AND lo.DateOfCurrentObservation = (SELECT MAX(DateOfCurrentObservation) FROM ODS.[vol].[LoadObservations]) 48 | AND lo.ChangeFromLastObservation > lo.CurrentThreeSDLevel 49 | 50 | 51 | GO 52 | 53 | 54 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/14 CREATE VIEW SSISErrorLogByPackage.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: SSIS Errors By Package 5 | -- Directions for use: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP VIEW IF EXISTS [dbo].[SSISErrorLogByPackage] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | 23 | 24 | 25 | CREATE VIEW [dbo].[SSISErrorLogByPackage] AS 26 | 27 | SELECT BL.BatchLogID AS [Batch Log ID], 28 | P.PackageName AS [Package Name], 29 | PEL.SourceName AS [Task Name], 30 | PEL.ErrorDescription AS [Error Description], 31 | PEL.LogDateTime AS [Log Date Time] 32 | FROM [SSISManagement]..PackageErrorLog PEL 33 | JOIN [SSISManagement]..PackageLog PL 34 | ON PEL.PackageLogID = PL.PackageLogID 35 | JOIN [SSISManagement]..PackageVersion PV 36 | ON PL.PackageVersionID = PV.PackageVersionID 37 | JOIN [SSISManagement]..Package P 38 | ON PV.PackageID = P.PackageID 39 | JOIN [SSISManagement]..BatchLog BL 40 | ON PL.BatchLogID = BL.BatchLogID 41 | 42 | GO 43 | 44 | 45 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/15 CREATE VIEW SSISPackageBatchRunTime.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: SSIS Run times by batch. 5 | -- Directions for use: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP VIEW IF EXISTS [dbo].[SSISPackageBatchRunTime] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | 23 | 24 | CREATE VIEW [dbo].[SSISPackageBatchRunTime] AS 25 | 26 | SELECT 27 | BL.BatchLogID AS [Batch ID], 28 | CONVERT(TIME,BL.EndDateTime - BL.StartDateTime) AS [Run Time], 29 | CASE [BL].[Status] 30 | WHEN 'S' THEN 'Success' 31 | WHEN 'F' THEN 'Fail' 32 | WHEN 'R' THEN 'Running' 33 | END AS [Batch Status] 34 | FROM [SSISManagement]..BatchLog BL 35 | 36 | GO 37 | 38 | 39 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/16 CREATE VIEW SSISPackageRunTime.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 20Oct20 4 | -- Description: See a log of package run times. 5 | -- Directions for use: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP VIEW IF EXISTS [dbo].[SSISPackageRunTime] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | CREATE VIEW [dbo].[SSISPackageRunTime] AS 23 | 24 | SELECT 25 | pkg.PackageName, 26 | pkglog.StartDateTime AS [Package Start Time], 27 | CONVERT(TIME,pkglog.EndDateTime - pkglog.StartDateTime) AS [Run Time], 28 | CASE pkglog.Status 29 | WHEN 'S' THEN 'Success' 30 | WHEN 'F' THEN 'Fail' 31 | WHEN 'R' THEN 'Running' 32 | END AS [Package Status] 33 | FROM SSISManagement..PackageLog pkglog 34 | JOIN SSISManagement..PackageVersion pkgvers 35 | ON pkglog.PackageVersionID = pkgvers.PackageVersionID 36 | JOIN SSISManagement..Package pkg 37 | ON pkgvers.PackageID = pkg.PackageID 38 | 39 | GO 40 | 41 | 42 | -------------------------------------------------------------------------------- /Framework Deployment Scripts/03 Framework Objects/17 CREATE FUNCTION udf_CleanDate.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 17Jun18 4 | -- Description: Clean dates to conform with DimDateCK 5 | -- ============================================= 6 | 7 | USE ODS 8 | 9 | GO 10 | 11 | DROP FUNCTION IF EXISTS udf_CleanDate 12 | 13 | GO 14 | 15 | CREATE FUNCTION udf_CleanDate(@DATE NVARCHAR(255)) 16 | 17 | RETURNS NCHAR(8) 18 | 19 | AS 20 | 21 | BEGIN 22 | 23 | DECLARE @MinDate DATE 24 | DECLARE @MaxDate DATE 25 | DECLARE @EmptyRecordCode BIGINT = 00000000 26 | DECLARE @ErrorDateCode BIGINT = 11111111 27 | 28 | --Find and replace YourDatabase.YourSchema.DimDate with the name of your data warehouse 29 | --and the schema that it lives in. 30 | 31 | SELECT @MaxDate = CAST(MAX(DateCK) AS NCHAR(8)) 32 | FROM YourDataWarehouse.dw.DimDate 33 | WHERE DateCK NOT IN (@EmptyRecordCode,@ErrorDateCode) 34 | 35 | SELECT @MinDate = CAST(MIN(DateCK) AS NCHAR(8)) 36 | FROM YourDataWarehouse.dw.DimDate 37 | WHERE DateCK NOT IN (@EmptyRecordCode,@ErrorDateCode) 38 | 39 | 40 | --Set empty dates to the empty field code 41 | --This HAS to be done before you check for 42 | --bad dates. Then Set error dates to the error field code. 43 | --Finally, clean up the dates to conform to DimDateCK format. 44 | 45 | RETURN 46 | CASE 47 | WHEN @Date IS NULL THEN @EmptyRecordCode 48 | WHEN ISDATE(@Date) = 0 THEN @ErrorDateCode 49 | WHEN @Date < @MinDate THEN @ErrorDateCode 50 | WHEN @Date > @MaxDate THEN @ErrorDateCode 51 | ELSE CONVERT(VARCHAR(10),CAST(@Date AS DATE),112) 52 | END 53 | 54 | 55 | END -------------------------------------------------------------------------------- /Framework Deployment Scripts/04 Python Package Installation/InstallFrameworkPackages.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import subprocess 3 | 4 | FailedInstalls = {} 5 | Packages = ['fuzzywuzzy','python-Levenshtein','pyunpack','pyodbc'] 6 | 7 | for package in Packages: 8 | try: 9 | subprocess.check_call([sys.executable, '-m', 'pip', 'install', package]) 10 | except Exception as e: 11 | FailedInstalls[package] = e 12 | 13 | for PackageName, ErrorMessage in FailedInstalls.items(): 14 | print('Package ' + PackageName + ' failed to install because ' + str(ErrorMessage)) 15 | 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Bob Wakefield 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mass Street Analytics Data Warehouse ETL Framework 2 | This is a framework for moving data into and out of an enterprise data warehouse (EDW). It includes everything you need to set up a self-service data analytics platform. 3 | Documentation can be found [here](https://etl-framework.massstreetuniversity.com/data-warehouse-etl-framework/). 4 | -------------------------------------------------------------------------------- /Rapid Development Tools/data model creation tool.xlsm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Rapid Development Tools/data model creation tool.xlsm -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/Python/global_config/test_config.yaml: -------------------------------------------------------------------------------- 1 | dns: 'DSN=ETL;' 2 | remote_root_directory: 'C:/' 3 | from_address: 'from email address of alert system' 4 | to_address: 'to email address of alert system' 5 | password: 'smtp server password' 6 | smtp_server: 'smtp server URL' 7 | smtp_port: smtp port value -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/Python/process_name/config/test_config.yaml: -------------------------------------------------------------------------------- 1 | process_specific_config: 'ProcessSpecificConfigValueForTesting' -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Support Objects/Functions/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Support Objects/Functions/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Support Objects/Views/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Support Objects/Views/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/01 Pull Data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/01 Pull Data/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/02 Clean Data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/02 Clean Data/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/03 Process Dimensions/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/03 Process Dimensions/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/04 Process Facts/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/04 Process Facts/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/05 Finalize And Audit/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/05 Finalize And Audit/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/06 Populate Reporting Tables/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/06 Populate Reporting Tables/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Code Repo/SQL/Warehouse Load/07 Monitoring/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Code Repo/SQL/Warehouse Load/07 Monitoring/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Archive/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Archive/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/In/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/In/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Out/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Business Unit Name/ETL Process Name/Out/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Archive/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Archive/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/In/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/In/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Out/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/File Processing And Storage/InterfaceAndExtractFiles/Third Party Name/ETL Process Name/Out/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/In/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/In/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/Out/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Business Unit Name/ETL Process Name/Out/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/In/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/In/.gitkeep -------------------------------------------------------------------------------- /Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/Out/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MassStreetAnalytics/etl-framework/214f5ecbdb6fd75f4c944d1fbee1ba4b83e9a308/Sample Directory Structures/Warehouse File IO/FileExchange/Third Party Name/ETL Process Name/Out/.gitkeep -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/01 Finalize And Audit Scripts/18 CREATE PROC usp_RecordRowCounts.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct17 4 | -- Description: Records how many records each fact table is getting loaded with. 5 | --Change Log: 6 | --21Aug21 Updated for use with SQL Server 2019 BW 7 | -- ============================================= 8 | USE ODS 9 | 10 | DROP PROCEDURE IF EXISTS usp_RecordRowCounts 11 | 12 | GO 13 | 14 | CREATE PROCEDURE usp_RecordRowCounts AS 15 | BEGIN 16 | 17 | 18 | -- 19 | BEGIN TRANSACTION 20 | 21 | DROP TABLE IF EXISTS #counts 22 | DROP TABLE IF EXISTS #DateOfLastObservations 23 | 24 | 25 | CREATE TABLE #counts(TableName nvarchar(255), [RowCount] int) 26 | CREATE TABLE #DateOfLastObservations(TableID INT, [RowCount] INT, DateOfLastObservation DATETIME) 27 | 28 | --Change [YourDataWarehouse] to the name of your EDW database. 29 | EXEC [DVDRentalDW]..sp_MSforeachtable @command1='INSERT #counts (TableName, [RowCount]) SELECT REPLACE(SUBSTRING(''?'',8,LEN(''?'')),'']'',''''), COUNT(*) FROM ?' 30 | 31 | --SELECT * 32 | --FROM #counts 33 | 34 | 35 | MERGE vol.Tables as target 36 | USING( 37 | SELECT TableName, CURRENT_TIMESTAMP AS date_measured, [RowCount] 38 | FROM #counts 39 | ) AS source 40 | ON source.TableName COLLATE DATABASE_DEFAULT = target.TableName COLLATE DATABASE_DEFAULT 41 | 42 | WHEN NOT MATCHED THEN 43 | INSERT ([TableName],[DateCreated]) 44 | VALUES (source.TableName, CURRENT_TIMESTAMP); 45 | 46 | 47 | INSERT INTO #DateOfLastObservations(TableID,DateOfLastObservation) 48 | SELECT [TableID], MAX([DateOfCurrentObservation]) AS DateOfLastObservation 49 | FROM vol.LoadObservations 50 | GROUP BY [TableID] 51 | 52 | ; 53 | WITH previous_observations(DateOfLastObservation, last_RowCount, TableID) 54 | AS( 55 | SELECT lo.[DateOfCurrentObservation], lo.[RowCount], lo.TableID 56 | FROM vol.LoadObservations lo 57 | JOIN #DateOfLastObservations llo 58 | ON lo.[DateOfCurrentObservation] = llo.DateOfLastObservation 59 | AND lo.[TableID] = llo.TableID 60 | ), 61 | current_sd_level(TableID, current_three_sd_level) 62 | AS( 63 | SELECT TableID, STDEV(ChangeFromLastObservation) * 3 64 | FROM vol.LoadObservations 65 | GROUP BY TableID 66 | ) 67 | INSERT vol.[LoadObservations](TableID, DateOfCurrentObservation, DateOfLastObservation, [RowCount], ChangeFromLastObservation, [CurrentThreeSDLevel]) 68 | SELECT t.TableID, CURRENT_TIMESTAMP AS DateOfCurrentObservation, po.DateOfLastObservation, c.[RowCount], ABS(c.[RowCount] - po.last_RowCount), ABS(sd.current_three_sd_level) 69 | FROM #counts c 70 | JOIN vol.Tables t 71 | ON c.TableName COLLATE DATABASE_DEFAULT = t.TableName COLLATE DATABASE_DEFAULT 72 | LEFT OUTER JOIN previous_observations po 73 | ON t.TableID = po.TableID 74 | LEFT OUTER JOIN current_sd_level sd 75 | ON t.TableID = sd.TableID 76 | 77 | 78 | COMMIT TRANSACTION 79 | 80 | 81 | DROP TABLE #counts 82 | DROP TABLE #DateOfLastObservations 83 | 84 | END 85 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/01 Finalize And Audit Scripts/19 CREATE PROC usp_MarkRecordsAsProcessed.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct17 4 | -- Description: Mark records in stage tables as 5 | -- having been processsed. 6 | -- Every fact table load you build will need code 7 | -- added to the usp_MarkRecordsAsProcessed stored 8 | -- procedure. 9 | -- ============================================= 10 | 11 | USE ODS 12 | GO 13 | 14 | DROP PROCEDURE IF EXISTS dbo.usp_MarkRecordsAsProcessed 15 | GO 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | CREATE PROCEDURE dbo.usp_MarkRecordsAsProcessed 24 | AS 25 | BEGIN 26 | 27 | 28 | UPDATE st 29 | SET Processed = 1 30 | FROM [eod].[Assets] st 31 | JOIN FSA..DimAssets pt 32 | ON pt.RowHash = st.UniqueRows 33 | WHERE st.ErrorRecord = 0 34 | 35 | UPDATE st 36 | SET Processed = 1 37 | FROM eod.Exchanges st 38 | JOIN FSA..FactExchangeCloseData pt 39 | ON pt.UniqueDims = st.UniqueDims 40 | WHERE st.ErrorRecord = 0 41 | 42 | 43 | 44 | END 45 | GO 46 | 47 | 48 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/01 Finalize And Audit Scripts/20 CREATE PROC usp_CheckForUnprocessedRecords.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 15Oct17 4 | -- Description: Check Stage Tables for unprocessed records. 5 | -- ============================================= 6 | 7 | 8 | USE ODS 9 | GO 10 | 11 | DROP PROCEDURE IF EXISTS dbo.usp_CheckForUnprocessedRecords 12 | GO 13 | 14 | SET ANSI_NULLS ON 15 | GO 16 | 17 | SET QUOTED_IDENTIFIER ON 18 | GO 19 | 20 | CREATE PROCEDURE dbo.usp_CheckForUnprocessedRecords 21 | AS 22 | BEGIN 23 | 24 | --Update with your operator name. 25 | DECLARE @OperatorName sysname = N'YourOperatorName'; 26 | 27 | DECLARE @OperatorEmailAddress NVARCHAR(100) = (SELECT email_address FROM msdb.dbo.sysoperators WHERE [name] = @OperatorName); 28 | 29 | 30 | --Check for unprocessed records 31 | SELECT * 32 | FROM [eod].[Assets] 33 | WHERE Processed = 0 34 | 35 | 36 | IF @@RowCount > 0 37 | BEGIN 38 | EXEC msdb.dbo.sp_send_dbmail 39 | @profile_name = 'Monitoring', 40 | @recipients = @OperatorEmailAddress, 41 | @subject = 'Unprocessed Records Exist', 42 | @body = 'There are unprocessed records in the Assets Staging Table' ; 43 | END 44 | 45 | SELECT * 46 | FROM [eod].[EODPrices] 47 | WHERE Processed = 0 48 | 49 | IF @@RowCount > 0 50 | BEGIN 51 | EXEC msdb.dbo.sp_send_dbmail 52 | @profile_name = 'Monitoring', 53 | @recipients = @OperatorEmailAddress, 54 | @subject = 'Unprocessed Records Exist', 55 | @body = 'There are unprocessed records in the EOD Prices Staging Table' ; 56 | END 57 | 58 | SELECT * 59 | FROM [eod].[Exchanges] 60 | WHERE Processed = 0 61 | 62 | 63 | IF @@RowCount > 0 64 | BEGIN 65 | EXEC msdb.dbo.sp_send_dbmail 66 | @profile_name = 'Monitoring', 67 | @recipients = @OperatorEmailAddress, 68 | @subject = 'Unprocessed Records Exist', 69 | @body = 'There are unprocessed records in the Exchanges staging table.' ; 70 | END 71 | 72 | 73 | END 74 | GO 75 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/02 Monitoring Scripts/21 CREATE PROC usp_DisplayTablesNotLoading.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 11April16 4 | -- Description: Displays tables that are not loading. 5 | -- Unwanted tables are tables that don't load that often by design. 6 | -- 10Mar17: Updated to take into account larger record counts 7 | -- Int datatype was too small. 8 | -- ============================================= 9 | 10 | USE [ODS] 11 | GO 12 | 13 | 14 | DROP PROCEDURE IF EXISTS [dbo].[usp_DisplayTablesNotLoading] 15 | GO 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | CREATE PROCEDURE [dbo].[usp_DisplayTablesNotLoading] AS 24 | BEGIN 25 | 26 | CREATE TABLE #UnWantedTables(TableID BIGINT) 27 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID) 28 | 29 | DECLARE @MostRecentDate DATETIME 30 | 31 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations] 32 | 33 | INSERT INTO #UnWantedTables(TableID) 34 | --Get rid of tables with no records 35 | SELECT t.TableID 36 | FROM [vol].[Tables] t 37 | JOIN [vol].[LoadObservations] lo 38 | ON t.TableID = lo.TableID 39 | GROUP BY t.TableID 40 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0 41 | UNION 42 | --Get rid of tables with no load variance 43 | SELECT t.TableID 44 | FROM [vol].[Tables] t 45 | JOIN [vol].[LoadObservations] lo 46 | ON t.TableID = lo.TableID 47 | GROUP BY t.TableID 48 | HAVING AVG(lo.CurrentThreeSDLevel) = 0 49 | 50 | --Manually get rid of unwanted tables. 51 | --INSERT INTO #UnWantedTables(TableID) 52 | --SELECT 131 53 | 54 | 55 | 56 | SELECT TableID, [TableName] 57 | FROM [vol].[Tables] 58 | WHERE TableID IN ( 59 | SELECT t.TableID 60 | FROM [vol].[Tables] t 61 | JOIN [vol].[LoadObservations] lo 62 | ON t.TableID = lo.TableID 63 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables) 64 | AND lo.DateOfCurrentObservation BETWEEN DATEADD(DD,-3,CURRENT_TIMESTAMP) AND CURRENT_TIMESTAMP 65 | GROUP BY t.TableID 66 | HAVING AVG(CAST(lo.ChangeFromLastObservation AS FLOAT)) = 0 67 | ) 68 | 69 | DROP TABLE #UnWantedTables 70 | 71 | END 72 | 73 | 74 | 75 | 76 | GO 77 | 78 | 79 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/02 Monitoring Scripts/22 CREATE PROC usp_LoadTableLoadReportingTable.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7April16 4 | -- Description: Loads table load reporting table 5 | -- 23Dec16: Updated to take into account larger record counts 6 | -- Int datatype was too small. 7 | -- ============================================= 8 | 9 | USE [ODS] 10 | GO 11 | 12 | 13 | DROP PROCEDURE IF EXISTS [dbo].[usp_LoadTableLoadReportingTable] 14 | GO 15 | 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | 24 | CREATE PROCEDURE [dbo].[usp_LoadTableLoadReportingTable] AS 25 | BEGIN 26 | 27 | CREATE TABLE #UnWantedTables(TableID INT) 28 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID) 29 | 30 | DECLARE @MostRecentDate DATETIME 31 | DECLARE @MostRecentDateInReportingTable DATE 32 | 33 | 34 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations] 35 | SELECT @MostRecentDateInReportingTable = MAX(DateOfCurrentObservation) FROM rpt.TableLoadReport 36 | 37 | INSERT INTO #UnWantedTables(TableID) 38 | --Get rid of tables with no records 39 | SELECT t.TableID 40 | FROM [vol].[Tables] t 41 | JOIN [vol].[LoadObservations] lo 42 | ON t.TableID = lo.TableID 43 | GROUP BY t.TableID 44 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0 45 | UNION 46 | --Get rid of tables with no load variance 47 | SELECT t.TableID 48 | FROM [vol].[Tables] t 49 | JOIN [vol].[LoadObservations] lo 50 | ON t.TableID = lo.TableID 51 | GROUP BY t.TableID 52 | HAVING AVG(CAST(lo.CurrentThreeSDLevel AS BIGINT)) = 0 53 | 54 | --Remove Rarely loaded tables from load analysis 55 | --INSERT INTO #UnWantedTables(TableID) 56 | --SELECT 131 57 | --UNION ALL 58 | --SELECT 135 59 | --UNION ALL 60 | --SELECT 118 61 | --UNION ALL 62 | --SELECT 123 63 | 64 | 65 | IF ISNULL(@MostRecentDateInReportingTable,'19000101') <> CAST(CURRENT_TIMESTAMP AS DATE) 66 | BEGIN 67 | INSERT INTO rpt.TableLoadReport( 68 | TableName, 69 | DateOfCurrentObservation, 70 | DateOfLastObservation, 71 | [RowCount], 72 | ChangeFromLastObservation, 73 | CurrentThreeSDLevel, 74 | AsOf 75 | ) 76 | SELECT 77 | t.TableName, 78 | lo.DateOfCurrentObservation, 79 | lo.DateOfLastObservation, 80 | lo.[RowCount], 81 | lo.ChangeFromLastObservation, 82 | lo.CurrentThreeSDLevel, 83 | CAST(CURRENT_TIMESTAMP AS DATE) 84 | FROM vol.Tables t 85 | JOIN vol.LoadObservations lo 86 | ON t.TableID = lo.TableID 87 | WHERE lo.CurrentThreeSDLevel IS NOT NULL 88 | AND t.TableID NOT IN (SELECT TableId FROM #UnWantedTables) 89 | ORDER BY t.TableName, lo.DateOfCurrentObservation ASC 90 | END 91 | 92 | DROP TABLE #UnWantedTables 93 | 94 | END 95 | GO 96 | 97 | 98 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/02 Monitoring Scripts/23 CREATE PROC usp_TableLoadMonitoring.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7April16 4 | -- Description: Checks to see if anything weird is 5 | -- going on with the tables loads and sends an alert 6 | -- when something strange is found. 7 | -- Script looks for unusual load amounts 8 | -- and tables not being loaded. 9 | -- 21Dec16: Updated to take into account larger record counts 10 | -- Int datatype was too small. 11 | -- ============================================= 12 | 13 | USE [ODS] 14 | GO 15 | 16 | 17 | DROP PROCEDURE IF EXISTS [dbo].[usp_TableLoadMonitoring] 18 | GO 19 | 20 | 21 | SET ANSI_NULLS ON 22 | GO 23 | 24 | SET QUOTED_IDENTIFIER ON 25 | GO 26 | 27 | CREATE PROCEDURE [dbo].[usp_TableLoadMonitoring] AS 28 | BEGIN 29 | 30 | --Update with your operator name. 31 | DECLARE @OperatorName sysname = N'YourOperatorName'; 32 | 33 | DECLARE @OperatorEmailAddress NVARCHAR(100) = (SELECT email_address FROM msdb.dbo.sysoperators WHERE [name] = @OperatorName); 34 | 35 | CREATE TABLE #UnWantedTables(TableID INT) 36 | CREATE CLUSTERED INDEX CIDX_UNWANTEDTABLES_TABLEID ON #UnWantedTables(TableID) 37 | 38 | DECLARE @MostRecentDate DATETIME 39 | 40 | SELECT @MostRecentDate = MAX(DateOfCurrentObservation) FROM [vol].[LoadObservations] 41 | 42 | INSERT INTO #UnWantedTables(TableID) 43 | --Get rid of tables with no records 44 | SELECT t.TableID 45 | FROM [vol].[Tables] t 46 | JOIN [vol].[LoadObservations] lo 47 | ON t.TableID = lo.TableID 48 | GROUP BY t.TableID 49 | HAVING AVG(CAST(lo.[RowCount] AS BIGINT)) = 0 50 | UNION 51 | --Get rid of tables with no load variance 52 | SELECT t.TableID 53 | FROM [vol].[Tables] t 54 | JOIN [vol].[LoadObservations] lo 55 | ON t.TableID = lo.TableID 56 | GROUP BY t.TableID 57 | HAVING AVG(lo.CurrentThreeSDLevel) = 0 58 | 59 | --Find Adverse Loads 60 | SELECT t.TableID 61 | FROM [vol].[Tables] t 62 | JOIN [vol].[LoadObservations] lo 63 | ON t.TableID = lo.TableID 64 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables) 65 | AND lo.DateOfCurrentObservation = @MostRecentDate 66 | AND lo.ChangeFromLastObservation > lo.CurrentThreeSDLevel 67 | --ORDER BY t.TableName 68 | 69 | IF @@RowCount > 0 70 | BEGIN 71 | EXEC msdb.dbo.sp_send_dbmail 72 | @profile_name = 'Monitoring', 73 | @recipients = @OperatorEmailAddress, 74 | @subject = 'Adverse Load Event', 75 | @body = 'One or more tables in the warehouse has experienced an anomalous load event. Check v_ShowAdverseTableLoads in the ODS database.' ; 76 | END 77 | 78 | --Remove Rarely loaded tables from 0 load analysis 79 | --INSERT INTO #UnWantedTables(TableID) 80 | --SELECT 135 81 | --UNION ALL 82 | --SELECT 131 83 | --UNION ALL 84 | --SELECT 118 85 | --UNION ALL 86 | --SELECT 123 87 | 88 | 89 | 90 | --Find tables not being loaded 91 | SELECT t.TableID 92 | FROM [vol].[Tables] t 93 | JOIN [vol].[LoadObservations] lo 94 | ON t.TableID = lo.TableID 95 | WHERE t.TableID NOT IN (SELECT TableID FROM #UnWantedTables) 96 | AND lo.DateOfCurrentObservation BETWEEN DATEADD(DD,-3,CURRENT_TIMESTAMP) AND CURRENT_TIMESTAMP 97 | GROUP BY t.TableID 98 | HAVING AVG(CAST(lo.ChangeFromLastObservation AS FLOAT)) = 0 99 | 100 | 101 | IF @@RowCount > 0 102 | BEGIN 103 | EXEC msdb.dbo.sp_send_dbmail 104 | @profile_name = 'Monitoring', 105 | @recipients = @OperatorEmailAddress, 106 | @subject = 'Adverse Load Event', 107 | @body = 'One or more tables in the warehouse have experienced three straight days of loading zero records. Run sp_DisplayTablesNotLoading in ODS database.'; 108 | END 109 | 110 | DROP TABLE #UnWantedTables 111 | 112 | END 113 | GO 114 | 115 | 116 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample dimension table.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use:Replace YourEDW with the name for 6 | -- your specific data warehouse. 7 | -- Replace YourDimensionName with the name of your dimension. 8 | -- Then fill in your columns. 9 | -- ============================================= 10 | USE [YourEDW] 11 | 12 | DROP TABLE IF EXISTS [dw].[DimYourDimensionName] 13 | GO 14 | 15 | SET ANSI_NULLS ON 16 | GO 17 | 18 | SET QUOTED_IDENTIFIER ON 19 | GO 20 | 21 | CREATE TABLE [dw].[DimYourDimensionName]( 22 | [YourDimensionNameCK] [bigint] IDENTITY(1,1) NOT NULL, 23 | --your columns here. 24 | [CreatedBy] [nvarchar](50) NULL, 25 | [CreatedOn] [datetime] NULL, 26 | [UpdatedBy] [nvarchar](50) NULL, 27 | [UpdatedOn] [datetime] NULL, 28 | [SourceSystem] [nvarchar](100) NULL, 29 | [SourceSystemKey] [nvarchar](100) NULL, 30 | [EffectiveFrom] [datetime] NULL, 31 | [EffectiveTo] [datetime] NULL, 32 | [IsMostRecentRecord] [bit] NULL 33 | CONSTRAINT [PK_YourDimensionName] PRIMARY KEY CLUSTERED 34 | ( 35 | [YourDimensionNameCK] ASC 36 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 37 | ) ON [PRIMARY] 38 | 39 | GO 40 | 41 | --index is optional 42 | DROP INDEX IF EXISTS [NCIDX_DimYourDimensionName_SourceSystemKey] ON [dw].[DimYourDimensionName] 43 | GO 44 | 45 | 46 | CREATE NONCLUSTERED INDEX [NCIDX_DimYourDimensionName_SourceSystemKey] ON [dw].[DimYourDimensionName] 47 | ( 48 | [SourceSystemKey] ASC 49 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 50 | GO 51 | 52 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample fact table.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use: Add the PK columns of the dimensions 6 | -- that are attached to the fact table, then add your measures 7 | -- and degenerate dimensions. Add the dimension keys to the UniqueDims hash. 8 | -- Add unique rows if necessary. 9 | -- ============================================= 10 | 11 | USE YourEDW 12 | 13 | DROP TABLE IF EXISTS [dw].[FactYourFactTableName] 14 | GO 15 | 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | SET ARITHABORT ON 24 | GO 25 | 26 | CREATE TABLE [dw].[FactYourFactTableName]( 27 | [RowID] [bigint] IDENTITY(1,1) NOT NULL, 28 | --dimension keys 29 | [TransactionID] NVARCHAR(100) NULL, 30 | --your columns. 31 | [CreatedOn] [datetime] NULL, 32 | [CreatedBy] [nvarchar](50) NULL, 33 | [UpdatedOn] [datetime] NULL, 34 | [UpdatedBy] [nvarchar](50) NULL, 35 | [SourceSystem] [nvarchar](50) NULL, 36 | [UniqueDims] AS (CONVERT([varbinary](35),hashbytes('SHA1',concat( 37 | CONVERT([nvarchar](35),[DimKey1CK],(0)), 38 | CONVERT([nvarchar](35),[DimKey2CK],(0)), 39 | CONVERT([nvarchar](35),[TransactionID],(0)) 40 | )),(0))) PERSISTED, 41 | --unique rows is optional 42 | [UniqueRows] AS (CONVERT([varbinary](35),hashbytes('SHA1',concat( 43 | CONVERT([nvarchar](35),[Column1],(0)), 44 | CONVERT([nvarchar](35),[Column2],(0)) 45 | )),(0))) PERSISTED, 46 | CONSTRAINT [PK_FactYourFactTableName] PRIMARY KEY CLUSTERED 47 | ( 48 | [RowID] ASC 49 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY], 50 | CONSTRAINT [IX_FactYourFactTableName] UNIQUE NONCLUSTERED 51 | ( 52 | [UniqueDims] ASC 53 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 54 | ) ON [PRIMARY] 55 | 56 | --index is optional 57 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_UniqueDims] ON [dw].[FactYourFactTableName] 58 | GO 59 | 60 | 61 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_UniqueDims] ON [dw].[FactYourFactTableName] 62 | ( 63 | [UniqueDims] ASC 64 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 65 | GO 66 | 67 | --index is optional 68 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_UniqueRows] ON [dw].[FactYourFactTableName] 69 | GO 70 | 71 | 72 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_UniqueRows] ON [dw].[FactYourFactTableName] 73 | ( 74 | [UniqueRows] ASC 75 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 76 | GO -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample indexed view.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use: Replace YourEDW with the name 6 | -- of your EDW. Drop in your select statement from your 7 | -- dimension devoid of audit columns. 8 | -- Replace YourDimensionName with your dimension 9 | -- Replace RenameCK with your renamed contrived key. 10 | -- ============================================= 11 | 12 | USE YourEDW 13 | 14 | GO 15 | DROP VIEW IF EXISTS DimYourDimensionName 16 | GO 17 | 18 | CREATE VIEW DimYourDimensionName 19 | 20 | WITH SCHEMABINDING 21 | AS 22 | SELECT 23 | YourDimensionNameCK, 24 | Column1, 25 | Column2, 26 | ColumnN, 27 | IsMostRecentRow 28 | FROM [dw].[DimYourDimensionName] 29 | GO 30 | --Create an index on the view. 31 | CREATE UNIQUE CLUSTERED INDEX CIDX_DimYourDimensionName_YourDimensionNameCK ON DimYourDimensionName(YourDimensionNameCK); 32 | GO 33 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample junk dimension table.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use: 6 | -- Replace YourEDW with the name of your specific EDW. 7 | -- Insert your columns at line 15. 8 | -- Replace YourDimensionName with the name of your dimension. 9 | -- Put your columns in the row hash being careful to change the parameter on the datatype to fit the column. 10 | -- ============================================= 11 | 12 | USE YourEDW 13 | 14 | DROP TABLE IF EXISTS [dw].[DimYourDimensionName] 15 | GO 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | 24 | CREATE TABLE [dw].[DimYourDimensionName]( 25 | [YourDimensionNameCK] [bigint] IDENTITY(1,1) NOT NULL, 26 | --your columns. 27 | [CreatedBy] [nvarchar](50) NULL, 28 | [CreatedOn] [datetime] NULL, 29 | [UpdatedBy] [nvarchar](50) NULL, 30 | [UpdatedOn] [datetime] NULL, 31 | [SourceSystem] [nvarchar](100) NULL, 32 | [SourceSystemKey] [nvarchar](100) NULL, 33 | [EffectiveFrom] [datetime] NULL, 34 | [EffectiveTo] [datetime] NULL, 35 | [IsMostRecentRecord] [bit] NULL, 36 | [RowHash] AS (CONVERT([binary](16),hashbytes('MD5',concat( 37 | CONVERT([nvarchar](35),Column1,0), 38 | CONVERT([nvarchar](35),Column2,0), 39 | CONVERT([nvarchar](35),Column3,0), 40 | CONVERT([nvarchar](35),Column4,0), 41 | CONVERT([nvarchar](35),Column5,0), 42 | CONVERT([nvarchar](35),Column6,0))),0)) PERSISTED, 43 | CONSTRAINT [PK_YourDimensionName] PRIMARY KEY CLUSTERED 44 | ( 45 | [YourDimensionNameCK] ASC 46 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 47 | ) ON [PRIMARY] 48 | END 49 | 50 | --index is optional 51 | DROP INDEX IF EXISTS [NCIDX_FactYourFactTableName_RowHash] ON [dw].[FactYourFactTableName] 52 | GO 53 | 54 | 55 | CREATE NONCLUSTERED INDEX [NCIDX_FactYourFactTableName_RowHash] ON [dw].[FactYourFactTableName] 56 | ( 57 | [RowHash] ASC 58 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 59 | GO 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample master data management table.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use: 6 | -- 1. Replace YourDimensionName with the name of your specific master data management table. 7 | -- 1. Replace the source system key place holder columns with your source system keys. 8 | -- 1. Replace the source system key place holder columns with your source system keys in the row hash. 9 | -- ============================================= 10 | 11 | USE ODS 12 | GO 13 | 14 | DROP INDEX IF EXISTS [NCIDX_YourDimensionNameMasterData_RowHash] ON [mdm].[YourDimensionNameMasterData] 15 | GO 16 | 17 | SET ARITHABORT ON 18 | SET CONCAT_NULL_YIELDS_NULL ON 19 | SET QUOTED_IDENTIFIER ON 20 | SET ANSI_NULLS ON 21 | SET ANSI_PADDING ON 22 | SET ANSI_WARNINGS ON 23 | SET NUMERIC_ROUNDABORT OFF 24 | GO 25 | 26 | DROP TABLE IF EXISTS [mdm].[YourDimensionNameMasterData] 27 | GO 28 | 29 | 30 | SET ANSI_NULLS ON 31 | GO 32 | 33 | SET QUOTED_IDENTIFIER ON 34 | GO 35 | 36 | CREATE TABLE [mdm].[YourDimensionNameMasterData]( 37 | [YourDimensionNameEK] [bigint] IDENTITY(1000,1) NOT NULL, 38 | [SourceSystemKey1] [nvarchar](50) NOT NULL, 39 | [SourceSystemKey2] [nvarchar](50) NOT NULL, 40 | [RowHash] AS (CONVERT([binary](35),hashbytes('SHA1',concat( 41 | CONVERT([nvarchar](35),[SourceSystemKey1],(0)), 42 | CONVERT([nvarchar](35),[SourceSystemKey2],(0)) 43 | )),(0))) PERSISTED, 44 | CONSTRAINT [PK_YourDimensionNameMasterData] PRIMARY KEY CLUSTERED 45 | ( 46 | [YourDimensionNameEK] ASC 47 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 48 | ) ON [PRIMARY] 49 | GO 50 | 51 | 52 | 53 | 54 | CREATE UNIQUE NONCLUSTERED INDEX [NCIDX_YourDimensionNameMasterData_RowHash] ON [mdm].[YourDimensionNameMasterData] 55 | ( 56 | [RowHash] ASC 57 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 58 | GO 59 | 60 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/03 Table Object Sample Scripts/sample stage table.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Directions for use: 6 | -- Replace YourSchemaName with the schema for your stage table. 7 | -- YourStageTableName with the name of your staging table. 8 | -- Drop your columns in row 16. 9 | -- Replace DatabaseName with the name of the source system. 10 | -- ============================================= 11 | USE ODS 12 | 13 | DROP TABLE IF EXISTS YourSchemaName.YourStageTableNameData 14 | GO 15 | 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | 24 | CREATE TABLE YourSchemaName.YourStageTableNameData( 25 | [ETLKey] [uniqueidentifier] NOT NULL, 26 | --your columns here. 27 | [UniqueDims] [varbinary](35) NULL, 28 | [UniqueRows] [varbinary](16) NULL, 29 | [SourceSystem] [nvarchar](255) NULL, 30 | [Cleansed] [bit] NULL, 31 | [ErrorRecord] [bit] NULL, 32 | [ErrorReason] [nvarchar](255) NULL, 33 | [Processed] [bit] NULL, 34 | [RunDate] [datetime] NULL, 35 | CONSTRAINT [PK_YourStageTableNameData] PRIMARY KEY CLUSTERED 36 | ( 37 | [ETLKey] ASC 38 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 39 | ) ON [PRIMARY] 40 | 41 | GO 42 | 43 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_ETLKey] DEFAULT (newid()) FOR [ETLKey] 44 | GO 45 | 46 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_SourceSystem] DEFAULT (N'DatabaseName') FOR [SourceSystem] 47 | GO 48 | 49 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_Cleansed] DEFAULT ((0)) FOR [Cleansed] 50 | GO 51 | 52 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_ErrorRecord] DEFAULT ((0)) FOR [ErrorRecord] 53 | GO 54 | 55 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_Processed] DEFAULT ((0)) FOR [Processed] 56 | GO 57 | 58 | ALTER TABLE [YourSchemaName].[YourStageTableNameData] ADD CONSTRAINT [DF_YourStageTableNameData_RunDate] DEFAULT (getdate()) FOR [RunDate] 59 | GO 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/batch processing.sql: -------------------------------------------------------------------------------- 1 | --Split a batch up into chunks using a cursor. 2 | --This method can be used for most any large table with some modifications 3 | --It could also be refined further with an @Day variable (for example) 4 | --This is for the case where you already have the data in a table 5 | --and are ready to process it. Obviously, if you have a large flat file, 6 | --you can just use BULK INSERT. 7 | 8 | 9 | DECLARE @Year INT 10 | DECLARE @Month INT 11 | 12 | DECLARE BatchingCursor CURSOR FOR 13 | SELECT DISTINCT YEAR([SomeDateField]),MONTH([SomeDateField]) 14 | FROM [Sometable]; 15 | 16 | 17 | OPEN BatchingCursor; 18 | FETCH NEXT FROM BatchingCursor INTO @Year, @Month; 19 | WHILE @@FETCH_STATUS = 0 20 | BEGIN 21 | 22 | --the transaction is optional 23 | BEGIN TRANSACTION 24 | --All logic goes in here 25 | --Any select statements from [Sometable] need to be suffixed with: 26 | --WHERE Year([SomeDateField])=@Year AND Month([SomeDateField])=@Month 27 | COMMIT TRANSACTION 28 | 29 | FETCH NEXT FROM BatchingCursor INTO @Year, @Month; 30 | END; 31 | CLOSE BatchingCursor; 32 | DEALLOCATE BatchingCursor; 33 | GO 34 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/bulk insert.sql: -------------------------------------------------------------------------------- 1 | --Insert into the view 2 | BULK INSERT YourSchema.YourTable 3 | FROM 'E:\YourFileName.csv' 4 | WITH ( 5 | FIELDTERMINATOR = ',', 6 | ROWTERMINATOR = '0x0a', 7 | FIRSTROW = 2 8 | ); -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/data cleansing.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Usage: 6 | -- ============================================= 7 | 8 | USE [ODS] 9 | GO 10 | 11 | 12 | DROP PROCEDURE [dbo].[usp_CleanYourStageTableNameData] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | 23 | CREATE PROCEDURE [dbo].[usp_CleanYourStageTableNameData] 24 | AS 25 | BEGIN 26 | 27 | BEGIN TRANSACTION 28 | 29 | 30 | --Convert the rest of the dates into YYYYMMDD format 31 | UPDATE [YourSchemaName].[YourStageTableNameData] 32 | SET [YourDateColumn] = udf_CleanDate([Date]) 33 | 34 | --Remove CR from volume data 35 | UPDATE [YourSchemaName].[YourStageTableNameData] 36 | SET YourColumn = REPLACE(YourColumn, CHAR(13) + CHAR(10), '') 37 | 38 | UPDATE [YourSchemaName].[YourStageTableNameData] 39 | SET [YourJoinColumn] = '' 40 | WHERE [YourJoinColumn] IS NULL 41 | 42 | 43 | UPDATE [YourSchemaName].[YourStageTableNameData] 44 | SET Cleansed = 1 45 | 46 | 47 | 48 | 49 | 50 | COMMIT TRANSACTION 51 | 52 | END 53 | 54 | 55 | 56 | GO 57 | 58 | 59 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/fact table load.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- ============================================= 6 | 7 | USE ODS 8 | 9 | GO 10 | 11 | IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[sp_ProcessFactYourFactTable]') AND type in (N'P', N'PC')) 12 | DROP PROCEDURE [dbo].[usp_ProcessFactYourFactTable] 13 | GO 14 | 15 | 16 | SET ANSI_NULLS ON 17 | GO 18 | 19 | SET QUOTED_IDENTIFIER ON 20 | GO 21 | 22 | CREATE PROCEDURE usp_ProcessFactYourFactTable 23 | AS 24 | BEGIN 25 | 26 | 27 | IF OBJECT_ID('tempdb..#DimYourDimensionTable') IS NOT NULL DROP TABLE #DimYourDimensionTable 28 | IF OBJECT_ID('tempdb..#DimYourJunkDimension') IS NOT NULL DROP TABLE #DimYourJunkDimension 29 | 30 | CREATE TABLE #DimYourDimensionTable(DimCK BIGINT, SourceSystemKey NVARCHAR(50), ETLKey UNIQUEIDENTIFIER) 31 | CREATE TABLE #DimYourJunkDimension(DimCK BIGINT, RowHash BINARY(16), ETLKey UNIQUEIDENTIFIER) 32 | 33 | 34 | CREATE CLUSTERED INDEX CIDX_1 ON #DimYourDimensionTable(ETLKey) 35 | CREATE CLUSTERED INDEX CIDX_2 ON #DimYourJunkDimension(ETLKey) 36 | 37 | 38 | CREATE NONCLUSTERED INDEX NCIDX_1 ON #DimYourDimensionTable(SourceSystemKey) 39 | CREATE NONCLUSTERED INDEX NCIDX_2 ON #DimYourJunkDimension(RowHash) 40 | 41 | 42 | BEGIN TRANSACTION 43 | ---- 44 | 45 | INSERT INTO #DimYourDimensionTable(SourceSystemKey, ETLKey) 46 | SELECT KeyColumn, ETLKey 47 | FROM StageTable 48 | WHERE Processed = 0 49 | AND [ErrorRecord] = 0 50 | 51 | 52 | UPDATE tt 53 | SET tt.DimCK = cmt.YourDimensionTableCK 54 | FROM #DimYourDimensionTable tt 55 | JOIN [cm].DimYourDimensionTable cmt 56 | ON tt.SourceSystemKey = cmt.SourceSystemKey 57 | 58 | 59 | --- 60 | INSERT INTO #DimYourJunkDimension(RowHash, etl_key) 61 | SELECT HASHBYTES('MD5',ColumnName), ETLKey 62 | FROM StageTable 63 | WHERE processed = 0 64 | AND error_record = 0 65 | 66 | UPDATE tt 67 | SET tt.DimCK = cmt.YourJunkDimensionCK 68 | FROM #DimYourJunkDimension tt 69 | JOIN cm.DimYourJunkDimension cmt 70 | ON tt.RowHash = cmt.RowHash 71 | 72 | ---- 73 | 74 | 75 | TRUNCATE TABLE [cm].[FactYourFactTable] 76 | 77 | 78 | 79 | INSERT INTO [cm].[FactYourFactTable]( 80 | [ETLKey], 81 | YourDimensionTableCK, 82 | YourJunkDimensionCK, 83 | --your columns 84 | [SourceSystem] 85 | ) 86 | SELECT 87 | p.[ETLKey], 88 | a.DimCK, 89 | b.DimCK, 90 | --your coulumns 91 | p.SourceSystem 92 | FROM YourStageTable p 93 | JOIN #DimYourDimensionTable a 94 | ON a.ETLKey = p.ETLKey 95 | JOIN #DimYourJunkDimension b 96 | ON b.ETLKey = p.ETLKey 97 | WHERE p.Processed = 0 98 | AND p.[ErrorRecord] = 0 99 | 100 | --Update staging so you can match to production later. 101 | UPDATE st 102 | SET st.[UniqueDims] = cmt.[UniqueDims] 103 | FROM YourStageTable st 104 | JOIN YourFactTable cmt 105 | ON st.ETLKey = cmt.ETLKey 106 | 107 | 108 | MERGE [FactYourFactTable] AS target 109 | USING( 110 | SELECT 111 | YourDimensionTableCK, 112 | YourJunkDimensionCK, 113 | --your columns 114 | [SourceSystem], 115 | [UniqueDims], 116 | FROM [cm].[FactYourFactTable] 117 | ) AS source 118 | ON target.[UniqueDims] = source.[UniqueDims] 119 | 120 | WHEN NOT MATCHED THEN 121 | 122 | INSERT( 123 | YourDimensionTableCK, 124 | YourJunkDimensionCK, 125 | --your columns 126 | [SourceSystem], 127 | [CreatedOn], 128 | [CreatedBy] 129 | ) 130 | VALUES( 131 | source.YourDimensionTableCK, 132 | source.YourJunkDimensionCK, 133 | --your columns 134 | source.[Volume], 135 | source.[SourceSystem], 136 | CURRENT_TIMESTAMP, 137 | SYSTEM_USER 138 | ) 139 | 140 | WHEN MATCHED 141 | THEN 142 | UPDATE 143 | SET 144 | YourColumns = source.YourColumns, 145 | [SourceSystem] = source.SourceSystem, 146 | [UpdatedBy] = SYSTEM_USER, 147 | [UpdatedOn] = CURRENT_TIMESTAMP 148 | ; 149 | 150 | 151 | 152 | 153 | 154 | 155 | COMMIT TRANSACTION 156 | 157 | 158 | DROP TABLE #DimYourDimensionTable 159 | 160 | 161 | END -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/remove dups.sql: -------------------------------------------------------------------------------- 1 | --Full instructions can be found at: https://tutorials.massstreetuniversity.com/transact-sql/solutions/removing-dups.html 2 | 3 | --First we need keep track of the dups 4 | --If you have a large amount of dups, use a 5 | --temp table instead. 6 | DECLARE @DuplicateIDs TABLE(RowNumber INT, ID BIGINT, TextDescription NVARCHAR(50)) 7 | 8 | INSERT INTO @DuplicateIDs(RowNumber, ID, TextDescription) 9 | SELECT ROW_NUMBER() OVER (PARTITION BY TextDescription ORDER BY TextDescription) AS RowNumber, ID, TextDescription 10 | FROM DimYourDimensionTable -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/type I dimension processing.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- ============================================= 6 | 7 | USE ODS 8 | 9 | GO 10 | 11 | DROP PROCEDURE IF EXISTS dbo.usp_ProcessDimYourDimensionName 12 | GO 13 | 14 | 15 | SET ANSI_NULLS ON 16 | GO 17 | 18 | SET QUOTED_IDENTIFIER ON 19 | GO 20 | 21 | CREATE PROCEDURE usp_ProcessDimYourDimensionName 22 | AS 23 | BEGIN 24 | 25 | DECLARE @LowDate AS DATETIME 26 | DECLARE @HighDate AS DATETIME 27 | 28 | SELECT @HighDate = CAST(MAX(DateCK) AS NCHAR(8)) 29 | FROM DimDate 30 | WHERE DateCK NOT IN (00000000,11111111) 31 | 32 | SELECT @LowDate = CAST(MIN(DateCK) AS NCHAR(8)) 33 | FROM DimDate 34 | WHERE DateCK NOT IN (00000000,11111111) 35 | 36 | TRUNCATE TABLE [cm].[DimYourDimensionName] 37 | 38 | BEGIN TRANSACTION 39 | 40 | BEGIN TRANSACTION 41 | 42 | INSERT INTO cm.DimYourDimensionName( 43 | --your columns here 44 | [SourceSystem], 45 | [SourceSystemKey] 46 | ) 47 | SELECT DISTINCT 48 | --your columns here 49 | FROM YourStageTable 50 | 51 | 52 | 53 | 54 | MERGE DimYourDimensionName AS target 55 | USING ( 56 | SELECT 57 | --your columns here 58 | [SourceSystem], 59 | [SourceSystemKey] 60 | FROM cm.DimYourDimensionName 61 | ) AS source 62 | ON (target.SourceSystemKey COLLATE DATABASE_DEFAULT = source.SourceSystemKey COLLATE DATABASE_DEFAULT) 63 | 64 | WHEN NOT MATCHED THEN 65 | INSERT ( 66 | --your columns here 67 | [SourceSystem], 68 | [SourceSystemKey], 69 | EffectiveFrom, 70 | EffectiveTo, 71 | IsMostRecentRecord, 72 | CreatedBy, 73 | CreatedOn 74 | ) 75 | VALUES ( 76 | --your columns here 77 | [SourceSystem], 78 | [SourceSystemKey], 79 | @LowDate, 80 | @HighDate, 81 | 1, 82 | SYSTEM_USER, 83 | CURRENT_TIMESTAMP 84 | ) 85 | 86 | WHEN MATCHED 87 | 88 | THEN 89 | UPDATE 90 | SET 91 | --your columns here 92 | target.[YourColumn] = source.[YourColumn], 93 | CreatedBy = SYSTEM_USER, 94 | CreatedOn = CURRENT_TIMESTAMP 95 | ; 96 | 97 | TRUNCATE TABLE cm.DimYourDimensionName 98 | 99 | INSERT INTO cm.DimYourDimensionName( 100 | YourDimensionNameCK, 101 | SourceSystemKey 102 | ) 103 | SELECT 104 | YourDimensionNameCK, 105 | SourceSystemKey 106 | FROM DimYourDimensionName 107 | 108 | 109 | 110 | 111 | COMMIT TRANSACTION 112 | 113 | END 114 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/04 Data Processing Sample Scripts/type II dimension processing.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: 3 | -- Create date: 4 | -- Description: 5 | -- Usage: https://tutorials.massstreetuniversity.com/transact-sql/solutions/load-typeII-dimension.html 6 | -- ============================================= 7 | 8 | USE ODS 9 | 10 | GO 11 | 12 | 13 | DROP PROCEDURE IF EXISTS dbo.usp_ProcessDimYourDimensionName 14 | GO 15 | 16 | 17 | SET ANSI_NULLS ON 18 | GO 19 | 20 | SET QUOTED_IDENTIFIER ON 21 | GO 22 | 23 | CREATE PROCEDURE usp_ProcessDimYourDimensionName 24 | AS 25 | BEGIN 26 | 27 | 28 | DECLARE @LowDate AS DATETIME 29 | DECLARE @HighDate AS DATETIME 30 | 31 | SELECT @HighDate = CAST(MAX(DateCK) AS NCHAR(8)) 32 | FROM DimDate 33 | WHERE DateCK NOT IN (00000000,11111111) 34 | 35 | SELECT @LowDate = CAST(MIN(DateCK) AS NCHAR(8)) 36 | FROM DimDate 37 | WHERE DateCK NOT IN (00000000,11111111) 38 | 39 | 40 | 41 | IF OBJECT_ID('tempdb..#DimYourDimensionName') IS NOT NULL DROP TABLE #DimYourDimensionName 42 | 43 | CREATE TABLE #DimYourDimensionName( 44 | [YourDimensionNameCK] [bigint] NULL, 45 | --your columns here 46 | [CreatedBy] [nvarchar](50) NULL, 47 | [CreatedOn] [datetime] NULL, 48 | [UpdatedBy] [nvarchar](50) NULL, 49 | [UpdatedOn] [datetime] NULL, 50 | [SourceSystem] [nvarchar](100) NULL, 51 | [SourceSystemKey] [nvarchar](100) NULL, 52 | [EffectiveFrom] [datetime] NULL, 53 | [EffectiveTo] [datetime] NULL, 54 | [IsMostRecentRecord] [bit] NULL 55 | ) 56 | 57 | 58 | TRUNCATE TABLE cm.DimYourDimensionName 59 | 60 | BEGIN TRANSACTION 61 | 62 | --Handle new records 63 | INSERT INTO cm.DimYourDimensionName( 64 | --your columns here 65 | [SourceSystem], 66 | [SourceSystemKey] 67 | ) 68 | SELECT 69 | DISTINCT 70 | [Name], 71 | [Code], 72 | [IsIntraday], 73 | [TimeZone], 74 | [Suffix], 75 | [Currency], 76 | [Country], 77 | [SourceSystem], 78 | [SourceSystemKey] 79 | FROM [YourSchemaName].[YourStageTable] 80 | WHERE Processed = 0 81 | 82 | MERGE DimYourDimensionName AS target 83 | USING ( 84 | SELECT 85 | --your columns here 86 | [SourceSystem], 87 | [SourceSystemKey], 88 | --RowHash 89 | FROM cm.DimYourDimensionName 90 | ) AS source 91 | ON target.[SourceSystemKey] = source.[SourceSystemKey] 92 | 93 | WHEN NOT MATCHED THEN 94 | INSERT ( 95 | --your columns here 96 | [SourceSystem], 97 | [SourceSystemKey], 98 | EffectiveFrom, 99 | EffectiveTo, 100 | IsMostRecentRecord, 101 | CreatedBy, 102 | CreatedOn 103 | ) 104 | VALUES ( 105 | --your columns here 106 | [SourceSystem], 107 | [SourceSystemKey], 108 | @LowDate, 109 | @HighDate, 110 | 1, 111 | SYSTEM_USER, 112 | CURRENT_TIMESTAMP 113 | ) 114 | ; 115 | 116 | --Handle changed records 117 | INSERT INTO #DimYourDimensionName( 118 | --your columns here 119 | [SourceSystem], 120 | [SourceSystemKey], 121 | EffectiveFrom, 122 | EffectiveTo, 123 | IsMostRecentRecord, 124 | CreatedBy, 125 | CreatedOn 126 | ) 127 | SELECT 128 | --your columns here 129 | [SourceSystem], 130 | [SourceSystemKey], 131 | EffectiveFrom, 132 | EffectiveTo, 133 | IsMostRecentRecord, 134 | CreatedBy, 135 | CreatedOn 136 | FROM( 137 | MERGE DimYourDimensionName AS target 138 | USING ( 139 | SELECT 140 | --your columns here 141 | [SourceSystem], 142 | [SourceSystemKey], 143 | --RowHash 144 | FROM cm.DimYourDimensionName 145 | ) AS source 146 | ON target.[SourceSystemKey] = source.[SourceSystemKey] 147 | WHEN MATCHED 148 | --AND source.RowHash <> target.RowHash 149 | AND target.IsMostRecentRecord = 1 150 | THEN 151 | UPDATE 152 | SET 153 | [UpdatedBy] = SYSTEM_USER, 154 | [UpdatedOn] = CURRENT_TIMESTAMP, 155 | EffectiveTo = DATEADD(ss,-1,CURRENT_TIMESTAMP), 156 | IsMostRecentRecord = 0 157 | OUTPUT 158 | $action Action_Out, 159 | --your columns here 160 | source.[SourceSystem], 161 | source.[SourceSystemKey], 162 | CURRENT_TIMESTAMP AS EffectiveFrom, 163 | @HighDate AS EffectiveTo, 164 | 1 AS IsMostRecentRecord, 165 | SYSTEM_USER AS CreatedBy, 166 | CURRENT_TIMESTAMP AS CreatedOn 167 | ) AS MERGE_OUT 168 | WHERE MERGE_OUT.Action_Out = 'UPDATE' 169 | ; 170 | 171 | INSERT INTO DimYourDimensionName( 172 | --your columns here 173 | [SourceSystem], 174 | [SourceSystemKey], 175 | EffectiveFrom, 176 | EffectiveTo, 177 | IsMostRecentRecord, 178 | CreatedBy, 179 | CreatedOn 180 | ) 181 | SELECT 182 | --your columns here 183 | [SourceSystem], 184 | [SourceSystemKey], 185 | EffectiveFrom, 186 | EffectiveTo, 187 | IsMostRecentRecord, 188 | CreatedBy, 189 | CreatedOn 190 | FROM #DimYourDimensionName 191 | 192 | 193 | TRUNCATE TABLE cm.DimYourDimensionName 194 | 195 | INSERT INTO cm.DimYourDimensionName( 196 | [YourDimensionNameCK], 197 | [SourceSystemKey] 198 | ) 199 | SELECT 200 | [YourDimensionNameCK], 201 | [SourceSystemKey] 202 | FROM DimYourDimensionName 203 | WHERE IsMostRecentRecord = 1 204 | 205 | COMMIT TRANSACTION 206 | 207 | DROP TABLE #DimYourDimensionName 208 | 209 | END -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/add rowhash.sql: -------------------------------------------------------------------------------- 1 | --Adding a RowHash on a TypeII dimension 2 | 3 | 4 | BEGIN TRANSACTION 5 | 6 | DROP INDEX IF EXISTS [Index Name] ON YourSchema.DimYourDimension 7 | 8 | ALTER TABLE YourSchema.DimYourDimension DROP COLUMN IF EXISTS RowHash 9 | 10 | ALTER TABLE YourSchema.DimYourDimension ADD [RowHash] AS (CONVERT([varbinary](16),hashbytes('MD5',concat( 11 | CONVERT([nvarchar](35),Column1,0), 12 | CONVERT([nvarchar](35),Column2,0), 13 | CONVERT([nvarchar](35),Column3,0), 14 | CONVERT([nvarchar](35),Column4,0), 15 | CONVERT([nvarchar](35),Column5,0), 16 | CONVERT([nvarchar](35),Column6,0) 17 | )),0)) PERSISTED 18 | 19 | CREATE NONCLUSTERED INDEX [Index Name] ON YourSchema.DimYourDimension(RowHash) 20 | 21 | COMMIT TRANSACTION 22 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/change collation.sql: -------------------------------------------------------------------------------- 1 | USE ODS 2 | 3 | BEGIN TRANSACTION 4 | 5 | USE EDW 6 | 7 | DROP INDEX IF EXISTS [NCINDEX_YourDimensionName_ROWHASH] ON [dw].[DimYourDimensionName] 8 | 9 | ALTER TABLE [dw].[DimYourDimensionName] DROP COLUMN RowHash 10 | 11 | ALTER TABLE dw.[DimYourDimensionName] ALTER COLUMN [YourDimensionName] [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS 12 | 13 | ALTER TABLE dw.[DimYourDimensionName] ADD RowHash AS (CONVERT([varbinary](16),hashbytes('MD5',concat( 14 | CONVERT([nvarchar](35),Column1,(0)), 15 | CONVERT([nvarchar](35),Column2,(0)), 16 | CONVERT([nvarchar](35),Column3,(0)), 17 | CONVERT([nvarchar](35),Column4,(0)) 18 | )))) PERSISTED 19 | 20 | CREATE NONCLUSTERED INDEX [NCINDEX_YourDimensionName_ROWHASH] ON [dw].[DimYourDimensionName](RowHash) 21 | 22 | USE ODS 23 | 24 | DROP INDEX IF EXISTS [NCINDEX_YourDimensionName_ROWHASH] ON cm.[DimYourDimensionName] 25 | 26 | ALTER TABLE cm.[DimYourDimensionName] DROP COLUMN RowHash 27 | 28 | ALTER TABLE cm.[DimYourDimensionName] ALTER COLUMN [YourDimensionName] [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS 29 | 30 | ALTER TABLE cm.[DimYourDimensionName] ADD RowHash AS (CONVERT([varbinary](16),hashbytes('MD5',concat( 31 | CONVERT([nvarchar](35),Column1,(0)), 32 | CONVERT([nvarchar](35),Column2,(0)), 33 | CONVERT([nvarchar](35),Column3,(0)), 34 | CONVERT([nvarchar](35),Column4,(0)) 35 | )))) PERSISTED 36 | 37 | 38 | CREATE NONCLUSTERED INDEX [NCINDEX_YourDimensionName_ROWHASH] ON cm.[DimYourDimensionName](RowHash) 39 | 40 | ALTER TABLE cp.OrdersData ALTER COLUMN YourDimensionNames [nvarchar](MAX) COLLATE SQL_Latin1_General_CP1_CS_AS 41 | 42 | 43 | USE EDW 44 | 45 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/configuration insert sample script.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Create an insert statement for every variable that you create as a configuration value. 3 | The PackageName is the name of the package MINUS the package extension. 4 | PackageName functions as a filter that you select your variables on. 5 | */ 6 | 7 | USE SSISManagement 8 | 9 | --Substitute the name of your package. 10 | 11 | DELETE FROM SSISConfigurations 12 | WHERE PackageName = 'Package' 13 | 14 | INSERT INTO SSISConfigurations(PackageName, VariableName, VariableValue) 15 | SELECT 'Package', 'strVariable1', 'NewValue' 16 | UNION ALL 17 | SELECT 'Package', 'intVariable2', '1000' -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/documentation block.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7Dec20 4 | -- Description: 5 | -- Change Log: 6 | -- 15Oct20 Created doc bloc sample. - BW 7 | -- ============================================= -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/populate fact table with fake data.sql: -------------------------------------------------------------------------------- 1 | -- ============================================= 2 | -- Author: Bob Wakefield 3 | -- Create date: 7Dec20 4 | -- Description: This script will populate a fact table with 5 | -- a random amount of records so you can test to see that 6 | -- usp_RecordRowCounts works properly. 7 | -- Change Log: 8 | -- ============================================= 9 | 10 | --set the range for where you want the final number of records to fall. 11 | DECLARE @random INT; 12 | DECLARE @upper INT; 13 | DECLARE @lower INT; 14 | SET @lower = 1 ---- The lowest random number 15 | SET @upper = 20 ---- The highest random number 16 | SELECT @random = ROUND(((@upper - @lower -1) * RAND() + @lower), 0) 17 | 18 | --This is an example using adventure works. Create an insert statement for whatever fact table you would like to load. 19 | DECLARE @counter smallint; 20 | SET @counter = 1; 21 | WHILE @counter < @random 22 | BEGIN 23 | INSERT INTO [AdventureWorksDW2012].[dbo].[FactFinance]([DateKey], [OrganizationKey], [DepartmentGroupKey], [ScenarioKey], [AccountKey], [Amount], [Date]) 24 | SELECT 20050701, 5, 7, 1, 21, 4358, CURRENT_TIMESTAMP 25 | SET @counter = @counter + 1 26 | END; 27 | GO 28 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/proc execution scripts.sql: -------------------------------------------------------------------------------- 1 | USE ODS 2 | 3 | --Your Model 4 | --Start Over 5 | TRUNCATE TABLE YourFactTable 6 | DELETE FROM DimYourDimTable 7 | 8 | --Pull Data 9 | EXEC usp_YourPullProcess 10 | 11 | --Clean Data 12 | EXEC usp_YourCleanProcess 13 | 14 | --Process Dims 15 | EXEC sp_ProcessDimExchanges 16 | 17 | --Process Facts 18 | EXEC sp_ProcessYourFactTable 19 | 20 | --Finalize and Audit 21 | EXEC sp_RecordRowCounts 22 | EXEC sp_MarkRecordsAsProcessed 23 | EXEC sp_CheckForUnprocessedRecords 24 | 25 | --Monitoring 26 | EXEC sp_TableLoadMonitoring 27 | EXEC sp_LoadTableLoadReportingTable 28 | 29 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/show all columns in database.sql: -------------------------------------------------------------------------------- 1 | USE YourEDW 2 | GO 3 | 4 | SELECT 5 | SCHEMA_NAME(t.schema_id) AS [Schema], 6 | t.name AS [Table Name], 7 | c.name AS [Column Name], 8 | CASE 9 | WHEN ty.name = 'char' OR ty.name = 'nvarchar' THEN CONCAT(ty.name,'(',ty.max_length,')') 10 | WHEN ty.name = 'numeric' THEN CONCAT(ty.name,'(',ty.precision,',',ty.scale,')') 11 | ELSE ty.name 12 | END AS [Data Type] 13 | FROM sys.tables AS t 14 | JOIN sys.columns c 15 | ON t.OBJECT_ID = c.OBJECT_ID 16 | JOIN sys.types ty 17 | ON c.user_type_id = ty.user_type_id 18 | LEFT OUTER JOIN sys.index_columns ic 19 | ON ic.object_id = c.object_id 20 | AND ic.column_id = c.column_id 21 | LEFT OUTER JOIN sys.indexes i 22 | ON ic.object_id = i.object_id 23 | AND ic.index_id = i.index_id 24 | WHERE 1 = 1 25 | --AND c.name LIKE '%YourColumnName%' 26 | ORDER BY [Table Name], [Column Name] -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/troubleshooting.sql: -------------------------------------------------------------------------------- 1 | --View information about current users, sessions, and processes 2 | EXEC sp_who2 3 | 4 | --see how long it will take for 5 | --a rollback to complete 6 | KILL [SPID] WITH STATUSONLY 7 | --Example 8 | KILL 59 WITH STATUSONLY 9 | 10 | --Kill open transactions 11 | DBCC opentran 12 | KILL [SPID] 13 | --Example 14 | KILL 59 15 | 16 | -------------------------------------------------------------------------------- /Sample Scripts/01 SQL/05 Helper Scripts/utils.sql: -------------------------------------------------------------------------------- 1 | --Create Schema 2 | CREATE SCHEMA SchemaName 3 | 4 | GO 5 | 6 | --Change collation to something case sensative 7 | USE master; 8 | 9 | ALTER DATABASE [your database name] SET SINGLE_USER WITH ROLLBACK IMMEDIATE 10 | 11 | ALTER DATABASE [your database name] COLLATE Latin1_General_CS_AS ; 12 | 13 | 14 | ALTER DATABASE [your database name] SET MULTI_USER WITH ROLLBACK IMMEDIATE 15 | 16 | 17 | --Verify the collation setting. 18 | SELECT name, collation_name 19 | FROM sys.databases 20 | WHERE name = N'[your database name]'; 21 | GO 22 | 23 | --Alter collation at the column level 24 | ALTER TABLE [table name] ALTER COLUMN [column name] [your data type] COLLATE Latin1_General_CS_AS 25 | -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/CombineCSVsIntoOneFile.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import glob 3 | import os 4 | 5 | root_directory = #populated from configuration 6 | app_directory = #populated from configuration 7 | source_directory = #as specified 8 | target_directory = #as specified 9 | output_file_name = 'YourFileName.csv' 10 | 11 | source_path = os.path.join(root_directory, app_directory, source_directory) 12 | target_path = os.path.join(root_directory, app_directory, target_directory, output_file_name) 13 | 14 | #change file extention if necessary 15 | source_files = glob.glob(source_path + '*.csv') 16 | 17 | with open(target_path, 'wb') as outfile: 18 | for i, fname in enumerate(source_files): 19 | with open(fname, 'rb') as infile: 20 | if i != 0: 21 | infile.readline() # Throw away header on all but first file 22 | 23 | shutil.copyfileobj(infile, outfile) # Block copy rest of file from input to output without parsing 24 | 25 | # clean input directory 26 | for i, fname in enumerate(source_files): 27 | os.remove(fname) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/ConvertCommaToPipeDelimitedFile.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | url = 'https://query.data.world/s/paxejv4t6pn3el4ttskmx7rhxhz5ho' 5 | script_dir = os.getcwd() 6 | data_directory = 'data\\' 7 | example_directory = 'PipeDelimitedExample\\' 8 | file_name = 'flights.csv' 9 | file_path = os.path.join(script_dir,data_directory,example_directory,file_name) 10 | 11 | # Read csv data into pandas and write | delimited txt file 12 | df = pd.read_csv(url,index_col=False, dtype=str) 13 | df.to_csv(file_path, sep="|",index=False) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/ConvertExcelToCSV.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | 5 | sourcePath = #as specified 6 | destinationPath = #as specified 7 | 8 | for path, directories, files in os.walk(sourcePath): 9 | for fn in files: 10 | print("Reading Excel file: {}".format(fn)) 11 | 12 | df = pd.read_excel(os.path.join(sourcePath, fn), sheet_name="DATASET") 13 | 14 | FullPath = destinationPath + "/" + os.path.splitext(fn)[0] + ".csv" 15 | 16 | print("Writing txt file to: {}".format(FullPath)) 17 | 18 | df.to_csv(FullPath, sep="|", index=False) 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/LoopingOverFilesInADirectory.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | #Loop Over Everything In Folder 4 | script_dir = os.getcwd() 5 | data_directory = 'data\\' 6 | example_directory = 'FileLoopExample\\' 7 | path = os.path.join(script_dir,data_directory,example_directory) 8 | 9 | for filename in os.listdir(path): 10 | print(filename) 11 | 12 | 13 | #Loop Over Files With A Specific File Extention 14 | script_dir = os.getcwd() 15 | data_directory = 'data\\' 16 | example_directory = 'FileLoopExample\\' 17 | path = os.path.join(script_dir,data_directory,example_directory) 18 | 19 | for filename in os.listdir(path): 20 | if filename.endswith('.csv'): 21 | print(filename) 22 | 23 | #Loop Over Files In Subdirectories Recursively 24 | import os 25 | 26 | script_dir = os.getcwd() 27 | data_directory = 'data\\' 28 | example_directory = 'FileLoopExample\\' 29 | path = os.path.join(script_dir,data_directory,example_directory) 30 | 31 | for subdir, dirs, files in os.walk(path): 32 | for filename in files: 33 | print(filename) 34 | -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/ProcessZipFile.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import os 3 | from pyunpack import Archive 4 | 5 | url = 'https://query.data.world/s/vb53nuuux6umwmccbwlajvlzttmz3q' 6 | script_dir = os.getcwd() 7 | file_name = 'Eurostat.zip' 8 | data_directory = 'data\\' 9 | example_directory = 'ZipFileExample\\' 10 | abs_file_path = os.path.join(script_dir, data_directory, example_directory, file_name) 11 | abs_directory_path = os.path.join(script_dir, data_directory, example_directory) 12 | 13 | with urllib.request.urlopen(url) as dl_file: 14 | with open(abs_file_path, 'wb') as out_file: 15 | out_file.write(dl_file.read()) 16 | 17 | Archive(abs_file_path).extractall(abs_directory_path) 18 | os.remove(abs_file_path) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/QueryDatabaseAndWriteLargeFile.py: -------------------------------------------------------------------------------- 1 | import pyodbc as db 2 | import os 3 | import time 4 | import csv 5 | 6 | script_dir = os.getcwd() 7 | data_directory = 'data\\' 8 | example_directory = 'WriteLargeFilesExample\\' 9 | file_name = 'EurostatDataOut.csv' 10 | target_path = os.path.join(script_dir, data_directory, example_directory, file_name) 11 | 12 | sql = 'SELECT * FROM EurostatData' 13 | 14 | # Set up the connection. 15 | print('Connecting to SQL Server database' + time.strftime(' %H:%M:%S')) 16 | connection_string = 'DSN=ETL;' 17 | conn = db.connect(connection_string) 18 | print('Preparing database for update' + time.strftime(' %H:%M:%S')) 19 | csr = conn.cursor() 20 | csr.execute(sql) 21 | 22 | with open(target_path, 'w', newline='') as f: 23 | writer = csv.writer(f, delimiter='|', quoting=csv.QUOTE_NONE) 24 | writer.writerow([x[0] for x in csr.description]) 25 | for row in csr: 26 | writer.writerow(row) 27 | 28 | print('Processing file {} complete.'.format(file_name) + time.strftime(' %H:%M:%S')) 29 | conn.commit() 30 | csr.close() 31 | conn.close() -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/QueryDatabaseAndWriteSmallFile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import pyodbc as db 4 | 5 | #Constants used to process data files 6 | #Exsiting paths are examples only! 7 | 8 | 9 | kFileDestination = 'C:/InterfaceAndExtractFiles/../Out/data.csv' #full path to processing Out folder 10 | 11 | #small amounts of data 12 | print("Starting: Processing Data") 13 | 14 | #alter the below for your file. 15 | try: 16 | print("Connecting to SQL Server database") 17 | connection_string = 'DSN=ETL;' 18 | conn = db.connect(connection_string) 19 | csr = conn.cursor() 20 | sql = "SELECT * FROM [table or view]" 21 | df = pd.read_sql(sql,conn) 22 | conn.commit() 23 | csr.close() 24 | conn.close() 25 | except Exception as e: 26 | print(e) 27 | 28 | #Write File 29 | df.to_csv(kFileDestination, sep="|",index=False) 30 | 31 | 32 | # Script Complete 33 | print("Complete: Processing Data") 34 | -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/SendEmail.py: -------------------------------------------------------------------------------- 1 | import smtplib 2 | from email.message import EmailMessage 3 | 4 | 5 | with open('global_config/config.yaml') as settings: 6 | cfg = yaml.load(settings) 7 | 8 | 9 | from_address = (cfg['from_address']) 10 | to_address = (cfg['to_address']) 11 | password = (cfg['password']) 12 | smtp_server = (cfg['smtp_server']) 13 | smtp_port = (cfg['smtp_port']) 14 | 15 | 16 | def send_exception_email(exchange_directory): 17 | msg = EmailMessage() 18 | msg['From'] = from_address 19 | msg['To'] = to_address 20 | msg['Subject'] = 'Empty Directory In EOD Data' 21 | msg.set_content('There are no files in ' + exchange_directory) 22 | 23 | try: 24 | server = smtplib.SMTP_SSL(smtp_server, smtp_port) 25 | server.login(from_address, password) 26 | server.send_message(msg) 27 | server.quit() 28 | except TimeoutError as e: 29 | print(str(e)) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/StringMatching.py: -------------------------------------------------------------------------------- 1 | from fuzzywuzzy import fuzz 2 | from fuzzywuzzy import process 3 | import sys 4 | import pandas as pd 5 | import pyodbc as db 6 | 7 | try: 8 | print("Connecting to SQL Server database") 9 | connection_string = 'DSN=ETL;' 10 | conn = db.connect(connection_string) 11 | csr = conn.cursor() 12 | sql = "SELECT TOP 100 CONCAT([FirstName], '|', [MiddleName], '|', [LastName]) AS String FROM DimCustomer" 13 | sql2 = "SELECT TOP 100 CONCAT([FirstName], '|', [MiddleName], '|', [LastName]) AS String FROM DimCustomer ORDER BY NEWID()" 14 | df1 = pd.read_sql(sql,conn) 15 | df2 = pd.read_sql(sql2,conn) 16 | conn.commit() 17 | csr.close() 18 | conn.close() 19 | except: 20 | sys.exit("ERROR: Unable to query table or write file") 21 | 22 | #print(df1['String']) 23 | 24 | def fuzzywuzzy_match(x, choices, scorer, cutoff): 25 | return process.extractOne(x, choices=choices, scorer=scorer, score_cutoff=cutoff) 26 | 27 | FuzzyWuzzyResults = df1['String'].apply(fuzzywuzzy_match, args=(df2['String'], fuzz.token_set_ratio, 90)) 28 | 29 | print(FuzzyWuzzyResults) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Building Blocks/YAMLConfigImport.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | with open('config.yaml') as settings: 4 | cfg = yaml.load(settings) 5 | 6 | variable_name = cfg['config_name'] -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Full Solutions/DownloadMoveAndStoreDataCSV.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import urllib 4 | import datetime 5 | import pandas as pd 6 | import pyodbc as db 7 | import zipfile as zf 8 | 9 | #The following four varaibles should be 10 | #populated by configuration 11 | FileExchangeRootDirectory = '' 12 | WarehouseProcessingRootDirectory = '' 13 | BusinessDirectory = '' 14 | ProcessDirectory = '' 15 | 16 | DataDirectory = 'In\\' 17 | ArchiveDirectory = 'Archive\\' 18 | FileName = '' 19 | fileTimeStampedTXT = '' 20 | 21 | FileURL = 'https://url.com/data.csv' #URL of File 22 | SiteURL = 'https://url.com' #URL of Site 23 | 24 | 25 | FullSourcePath = os.path.join(FileExchangeRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName) 26 | DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory) 27 | ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory) 28 | 29 | 30 | print("Starting: Processing Data") 31 | 32 | # verify that the site is available and the internet connection is working 33 | try: 34 | print("Validating status of site and internet connection") 35 | urllib.request.urlopen(SiteURL) 36 | except Exception as e: 37 | print(e) 38 | 39 | # download the file 40 | try: 41 | print("Downloading file to:", FullSourcePath) 42 | urllib.request.urlretrieve(FileURL, FullSourcePath) 43 | except Exception as e: 44 | print(e) 45 | 46 | 47 | # Read csv data into pandas and write | delimited txt file 48 | try: 49 | print("Reading csv file: {}".format(FullSourcePath)) 50 | df = pd.read_csv(FullSourcePath,index_col=False,type=str) 51 | downloadTimeStamp = datetime.datetime.today().strftime('%Y%m') 52 | print("Setting download timestamp") 53 | df['AsOfMonth'] = downloadTimeStamp 54 | fileTimeStampedTXT = DestinationDirectory + FileName + downloadTimeStamp + "_data.txt" 55 | print("Writing txt file to: {}".format(fileTimeStampedTXT)) 56 | df.to_csv(fileTimeStampedTXT, sep="|",index=False) 57 | except Exception as e: 58 | print(e) 59 | 60 | # delete csv file 61 | try: 62 | print("Deleting csv file: {}".format(FullSourcePath)) 63 | if os.path.isfile(FullSourcePath): 64 | os.remove(FullSourcePath) 65 | except Exception as e: 66 | print(e) 67 | 68 | #alter the below for your file. 69 | #index table only if necessary 70 | #bulk load txt file to SQL Server 71 | try: 72 | print("Connecting to SQL Server database") 73 | connection_string = 'DSN=ETL;' 74 | conn = db.connect(connection_string) 75 | print("Preparing database for update") 76 | csr = conn.cursor() 77 | csr.execute("DELETE FROM [stage table] WHERE Processed = 1") 78 | csr.execute("DROP INDEX IF EXISTS [index name] ON [stage table]") 79 | sql = "BULK INSERT [stage table view] FROM '" + fileTimeStampedTXT + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)" 80 | print("Update database with {} file data.".format(fileTimeStampedTXT)) 81 | csr.execute(sql) 82 | print("Creating index on SQL table") 83 | csr.execute("CREATE NONCLUSTERED INDEX [index name] ON [stage table]([column name])") 84 | print("Completing SQL Server update") 85 | conn.commit() 86 | csr.close() 87 | conn.close() 88 | except Exception as e: 89 | print(e) 90 | 91 | # zip txt file to archive 92 | try: 93 | zipFile = ArchiveDestinationDirectory + FileName + downloadTimeStamp + "data.zip" 94 | print("Creating zip file for txt file archive") 95 | archive = zf.ZipFile(zipFile, "w") 96 | archive.write(fileTimeStampedTXT, os.path.basename(fileTimeStampedTXT)) 97 | archive.close 98 | except Exception as e: 99 | print(e) 100 | 101 | # delete txt file 102 | try: 103 | print("Deleting txt file: {}".format(fileTimeStampedTXT)) 104 | if os.path.isfile(fileTimeStampedTXT): 105 | os.remove(fileTimeStampedTXT) 106 | except Exception as e: 107 | print(e) 108 | 109 | # Script Complete 110 | print("Complete: Processing Data") 111 | -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Full Solutions/LoadLargeCSVsIntoDataWarehouseStagingTables.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pyodbc as db 3 | import time 4 | 5 | #The following four varaibles should be 6 | #populated by configuration 7 | WarehouseProcessingRootDirectory = '' 8 | BusinessDirectory = '' 9 | ProcessDirectory = '' 10 | 11 | DataDirectory = 'In\\' 12 | FileName = '' 13 | 14 | FullSourcePath = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName) 15 | 16 | #Build SQL Statements 17 | drop_index_sql = 'ALTER TABLE [YourSchema].[YourStageTable] DROP CONSTRAINT [PK_YourStageTable] WITH ( ONLINE = OFF )' 18 | 19 | add_index_sql = 'ALTER TABLE [YourSchema].[YourStageTable] ADD CONSTRAINT [PK_YourStageTable] PRIMARY KEY CLUSTERED' 20 | add_index_sql = add_index_sql + '([ETLKey] ASC) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,' 21 | add_index_sql = add_index_sql + 'SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON,' 22 | add_index_sql = add_index_sql + 'ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]' 23 | 24 | sql = "BULK INSERT YourStageTableView FROM '" + FullSourcePath + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2, TABLOCK, BATCHSIZE = 100000)" 25 | 26 | #Set up the connection. 27 | print('Connecting to SQL Server database' + time.strftime(' %H:%M:%S')) 28 | connection_string = 'DSN=ETL;' 29 | conn = db.connect(connection_string) 30 | print('Preparing database for update' + time.strftime(' %H:%M:%S')) 31 | csr = conn.cursor() 32 | 33 | #now let's load the file 34 | print('Begin processing {}.'.format(FileName) + time.strftime(' %H:%M:%S')) 35 | csr.execute('TRUNCATE TABLE [YourSchema].[YourStageTable]') 36 | csr.execute(drop_index_sql) 37 | print('Updating staging') 38 | csr.execute(sql) 39 | csr.execute(add_index_sql) 40 | print('Processing file {} complete.'.format(FileName) + time.strftime(' %H:%M:%S')) 41 | conn.commit() 42 | csr.close() 43 | conn.close() 44 | 45 | print('Complete: Processing Data' + time.strftime(' %H:%M:%S')) -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Full Solutions/MoveAndStoreDataExcel.py: -------------------------------------------------------------------------------- 1 | #See documenatation for use of this script. import os import sys import datetime import pandas as pd import pyodbc as db import zipfile as zf #The following four varaibles should be #populated by configuration FileExchangeRootDirectory = '' WarehouseProcessingRootDirectory = '' BusinessDirectory = '' ProcessDirectory = '' DataDirectory = 'In\\' ArchiveDirectory = 'Archive\\' FileName = '' fileTimeStampedTXT = '' FullSourcePath = os.path.join(FileExchangeRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory, FileName) DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory) ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory) print("Starting: Processing data") # Check if required file exists if not os.path.isfile(FullSourcePath): sys.exit("ERROR: Unable to find file {}".format(FullSourcePath)) # Read excel data into pandas and write | delimited txt file try: print("Reading Excel file: {}".format(FullSourcePath)) df = pd.read_excel(FullSourcePath, sheet_name="DATASET") # set timestamp for file processing print("Setting process timestamp") processTimeStamp = datetime.datetime.today().strftime('%Y%m%d') # Create txt filename wih timestamp fileTimeStampedTXT = DestinationDirectory + processTimeStamp + "_data.txt" print("Writing txt file to: {}".format(fileTimeStampedTXT)) df.to_csv(fileTimeStampedTXT, sep="|", index=False) except Exception as e: print(e) # delete xlsx file try: print("Deleting xlsx file: {}".format(FullSourcePath)) if os.path.isfile(FullSourcePath): os.remove(FullSourcePath) except Exception as e: print(e) # bulk load txt file to SQL Server try: print("Connecting to SQL Server database") connection_string = 'DSN=ETL;' conn = db.connect(connection_string) print("Preparing database for update") csr = conn.cursor() csr.execute("DELETE FROM [stage table] WHERE Processed = 1") print("Preparing bulk insert update") sql = "BULK INSERT [stage table view] FROM '" + fileTimeStampedTXT + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)" print("Update database with {} file data.".format(fileTimeStampedTXT)) csr.execute(sql) conn.commit() csr.close() conn.close() except Exception as e: print(e) # zip txt file to archive try: zipFile = ArchiveDestinationDirectory + processTimeStamp + "__data.zip" print("Creating zip file for txt file archive") archive = zf.ZipFile(zipFile, "w") archive.write(fileTimeStampedTXT, os.path.basename(fileTimeStampedTXT)) archive.close except Exception as e: print(e) # delete txt file try: print("Deleting txt file: {}".format(fileTimeStampedTXT)) if os.path.isfile(fileTimeStampedTXT): os.remove(fileTimeStampedTXT) except Exception as e: print(e) # Script Complete print("Complete: Processing Data") -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Full Solutions/ReloadFromArchive.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import glob 4 | import pyodbc as db 5 | import zipfile as zf 6 | 7 | #The following three varaibles should be 8 | #populated by configuration 9 | WarehouseProcessingRootDirectory = '' 10 | BusinessDirectory = '' 11 | ProcessDirectory = '' 12 | 13 | DataDirectory = 'In\\' 14 | ArchiveDirectory = 'Archive\\' 15 | FileName = '' 16 | fileTimeStampedTXT = '' 17 | 18 | DestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, DataDirectory) 19 | ArchiveDestinationDirectory = os.path.join(WarehouseProcessingRootDirectory, BusinessDirectory, ProcessDirectory, ArchiveDirectory) 20 | 21 | print("Starting: Processing ReloadAchive") 22 | 23 | # get list of zip file 24 | try: 25 | print("Getting zip files from archive directory {}".format(ArchiveDestinationDirectory)) 26 | zipFiles = glob.glob(ArchiveDestinationDirectory + "*.zip", recursive=False) 27 | except Exception as e: 28 | print(e) 29 | 30 | 31 | # Report number of zip files found 32 | print("{} zip files found".format(len(zipFiles))) 33 | 34 | # unzip each file in zipFiles 35 | for zFile in zipFiles: 36 | try: 37 | print("Unzipping file {}".format(zFile)) 38 | zip_ref = zf.ZipFile(zFile, 'r') 39 | zip_ref.extractall(DestinationDirectory) 40 | zip_ref.close() 41 | except Exception as e: 42 | print(e) 43 | 44 | # get list of txt files in C:/InterfaceAndExtractFiles/../In/ 45 | try: 46 | print("Getting txt files from directory {}".format(DestinationDirectory)) 47 | txtFiles = glob.glob(DestinationDirectory + "*.txt", recursive=False) 48 | except Exception as e: 49 | print(e) 50 | 51 | # Report number of zip files found 52 | print("{} txt files found".format(len(txtFiles))) 53 | 54 | # Create database connection 55 | try: 56 | print("Connecting to SQL Server database") 57 | connection_string = 'DSN=ETL;' 58 | conn = db.connect(connection_string) 59 | except Exception as e: 60 | print(e) 61 | 62 | # preparing SQL Server 63 | try: 64 | print("Preparing database for update") 65 | csr = conn.cursor() 66 | csr.execute("DELETE FROM [stage table] WHERE Processed = 1") 67 | csr.execute("DROP INDEX IF EXISTS [index name] ON [stage table]") 68 | conn.commit() 69 | csr.close() 70 | except Exception as e: 71 | print(e) 72 | 73 | # creating counters 74 | txtFileCount = len(txtFiles) 75 | n = 1 76 | 77 | # process each txt file 78 | for tFile in txtFiles: 79 | try: 80 | print("Processng file {} of {}: Update database with {} file data.".format(n, txtFileCount, tFile)) 81 | n += 1 82 | csr = conn.cursor() 83 | sql = "BULK INSERT [stage table view] FROM '" + tFile + "' WITH (FIELDTERMINATOR = '|', ROWTERMINATOR = '0x0a', FIRSTROW = 2)" 84 | csr.execute(sql) 85 | conn.commit() 86 | csr.close() 87 | except Exception as e: 88 | print(e) 89 | 90 | try: 91 | print("Deleting {} file".format(tFile)) 92 | if os.path.isfile(tFile): 93 | os.remove(tFile) 94 | except Exception as e: 95 | print(e) 96 | 97 | # Complete SQL Server processing 98 | try: 99 | print("Completing SQL Server update") 100 | print("Creating index on SQL table") 101 | csr = conn.cursor() 102 | csr.execute("CREATE NONCLUSTERED INDEX [index name] ON [stage table]([column name])") 103 | print("Completing SQL Server update") 104 | conn.commit() 105 | csr.close() 106 | conn.close() 107 | except Exception as e: 108 | print(e) 109 | 110 | # Complete 111 | print("COMPLETE: Process Reload from Archive") -------------------------------------------------------------------------------- /Sample Scripts/02 Python/Jupyter Notebooks/PotentialMDMProcess.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 18, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Connecting to SQL Server database\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import sys\n", 18 | "import pandas as pd\n", 19 | "import pyodbc as db\n", 20 | "import hdbscan\n", 21 | "from sklearn.datasets import make_blobs\n", 22 | "\n", 23 | "\n", 24 | "try:\n", 25 | " print(\"Connecting to SQL Server database\")\n", 26 | " connection_string = 'DSN=ETL;'\n", 27 | " conn = db.connect(connection_string)\n", 28 | " csr = conn.cursor()\n", 29 | " sql = \"SELECT FirstName FROM DimCustomer\"\n", 30 | " df1 = pd.read_sql(sql,conn)\n", 31 | " conn.commit()\n", 32 | " csr.close()\n", 33 | " conn.close()\n", 34 | "except:\n", 35 | " sys.exit(\"ERROR: Unable to query table or write file\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 19, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from sklearn.feature_extraction.text import CountVectorizer\n", 45 | "from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm\n", 46 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", 47 | "from sklearn import decomposition, ensemble\n", 48 | "\n", 49 | "vectorizer = CountVectorizer()\n", 50 | "X = vectorizer.fit_transform(df1['FirstName'])\n", 51 | "clusterer = hdbscan.HDBSCAN(min_cluster_size=10)\n", 52 | "cluster_labels = clusterer.fit_predict(X)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 16, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "" 64 | ] 65 | }, 66 | "execution_count": 16, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | }, 70 | { 71 | "data": { 72 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAADxCAYAAAAjibd7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAbKklEQVR4nO3df5xldX3f8debZSnKD+lDVh9mYQQNiJhKQwfQxkTAoAvVUvwRUKsJilsasI2xBB4xyqz2h0rSqEFcJ4iIJtIqBBZd2fqzYHFlFxOXH4pdMYUFG3ZBheIP2Jl3/zhn8O4wc8+Zu/fcOXfu+/l4nMfMOef7Ped7lwef+53vT9kmIiJGyx6LXYCIiBi8BP+IiBGU4B8RMYIS/CMiRlCCf0TECErwj4gYQQn+EREDIukySfdLum2e+5L0IUlbJW2RdHRTZUnwj4gYnMuBVV3unwwcVh6rgY80VZAE/4iIAbF9A/BglySnAle4sBE4QNIzmijLnk08tEmrVq3y9ddfv9jFiIjhoN19wMtO2McPPDhVK+0tW35xO/DzjkuTticX8LqVwD0d59vKaz9cwDNqGbrgv2PHjsUuQkSMkAcenOLmDWO10i57xv/+ue3x3XjdXF9WjazBM3TBPyJikAxMMz2o120DDu44Pwi4r4kXpc0/IqILYx7zVK2jD9YBbyxH/bwA+Intvjf5QGr+ERGV+lXzl/Rp4HjgQEnbgAuB5QC21wLrgVOArcBPgTP78uI5JPhHRHRhzFSflr63/dqK+wbO6cvLKiT4R0RUmG6mz3VRJfhHRHRhYCrBPyJi9CzFmn9jo32q1rDoSHeMpClJr26qLBERvTLwmF3rGCZN1vwvBy4GrpgvgaRlwPuADU0V4rTjX8M1//OzTT0+IgboXecdwZr3f2eg7zReks0+jdX8a6xhAfBW4Crg/qbK8f9uANtMbT+Nqe2nYZvf1quxvcvv/bjW1HNnX+v8LDO/d16bK92wftaqzz+I97fls47Cf+uZn3N91qntp3HhHz23qVAxP8NUzWOYLNokL0krgdOAtTXSrpa0WdLm7du3N1+4iIhSMcO33jFMFnOG7weA8+3qaXG2J22P2x5fsWLFAIoWETFDTNU8hslijvYZB66UBHAgcIqknbavWcQyRUTsoujwHa7AXseiBX/bh878Luly4HMJ/BHRNsU4/wT/2mqsYRERMRSmU/OvzxVrWMxK+3tNlSMiYnek5h8RMYKMmFqCq98n+EdEVEizT0TEiDHiUS9b7GL0XYJ/REQXxSSvNPtERIycdPhGRIwYW0w5Nf+IiJEznZp/RMRoKTp8l16oXHqfKCKij9LhGxExoqYyzj8iYrRkhm9ExIiazmifiIjRUizsluAfETFSjHgsyztERIwWm0zyiogYPcokr4iIUWNS84+IGEnp8I2IGDFGS3Izl8a+ziRdJul+SbfNc//1kraUx02SjmqqLBERvTLwmPesdQyTJv+WuRxY1eX+D4AX234+8B5gssGyRET0SEzVPIZJY19Vtm+QdEiX+zd1nG4EDmqqLBERvTKZ4dukNwNfmO+mpNXAaoCxsbFBlSkiAliaO3kt+teZpBMogv/586WxPWl73Pb4ihUrBle4iBh5tpj2HrWOKpJWSbpT0lZJF8xx/ymSrpP0bUm3SzqzkQ/FItf8JT0fuBQ42fYDi1mWiIi5FB2+u7+8g6RlwIeBk4BtwCZJ62zf0ZHsHOAO26+QtAK4U9Jf2X50twswy6IFf0ljwNXAG2x/b7HKERHRXd/28D0W2Gr7LgBJVwKnAp3B38B+kgTsCzwI7OzHy2drLPhL+jRwPHCgpG3AhcByANtrgXcBTwUuKT4nO22PN1WeiIheFB2+tdv8D5S0ueN80vbMSMaVwD0d97YBx83KfzGwDrgP2A843fb0ggtdQ5OjfV5bcf8s4Kym3h8R0S8LmOG7o0sldq5vEM86fxnwd8CJwLOBL0q60fZDdQtQ16J3+EZEtNnMDN86R4VtwMEd5wdR1PA7nQlc7cJWivlQR/Ttw3RI8I+IqDDNHrWOCpuAwyQdKmkv4AyKJp5OdwMvAZD0dOA5wF19/jhAe8b5R0S0kg2PTe9+Pdn2TknnAhuAZcBltm+XdHZ5fy3FageXS7qVopnofNs7dvvlc0jwj4joomj26U8jie31wPpZ19Z2/H4f8NK+vKxCgn9ERIWlOMM3wT8ioosFDvUcGgn+ERFd9a/Zp00S/CMiKmQP34iIEVOM9tn9tX3aJsE/IqKLpbqNY4J/RESFNPtERIyYto72kbQ38HLgN4FfAX4G3AZ83vbtVfkT/CMiKrRttI+kCeAVwNeAbwL3A3sDhwPvLb8Y3m57y3zPSPCPiOjCFjtbFvyBTbYn5rn3XyU9Dei6522Cf0REhbY1+9j+/OxrkvYA9rX9kO37Kf4amFfrvs4iItpkps2/D0s6952kv5a0v6R9KHYEu1PSeXXyJvhHRFRoa/AHjiw3evlXFAvGjQFvqJMxzT4REV20fJz/cknLKYL/xbYfK7fFrZSaf0REhWlU61gEHwX+HtgHuEHSM4Gf1MnYWPCXdJmk+yXdNs99SfqQpK2Stkg6uqmyRET0yoad03vUOhbBdbZX2j7Ftil2AntTnYxNlvZyYFWX+ycDh5XHauAjDZYlIqJnLW7zv6rzpPwCuLJOxsba/G3fIOmQLklOBa4oC7tR0gGSnmH7h02VKSJiodrY5i/pCOB5wFMkvbLj1v4Uk70qLWaH70rgno7zbeW1JwR/Sasp/jpgbKzrvIWIiL5zy4I/xcbuLwcOoJjpO+Nh4C11HrCYwX+uf03PldD2JDAJMD4+PmeaiIimtG1hN9vXAtdKeqHtb/TyjMUM/tuAgzvODwLuW6SyRETMyW7fDN8OWyX9MXAIHfHcdmWn72IG/3XAuZKuBI4DfpL2/ohoHzG1OCN56rgWuBH4EjC1kIyNBX9JnwaOBw6UtA24EFgOYHstxWy0U4CtwE+BM5sqS0TE7mhhm/+MJ9s+v5eMTY72eW3FfQPnNPX+iIh+aOt6/qXPSTrF9vqFZmzt3zIREa3got2/zrEI/j3FF8DPJD0k6WFJD9XJmLV9IiIqtG20zwzb+/WaN8E/IqILt7DDV9IRtr8737I4tr9V9YwE/4iICovUpNPNH1JMfP2zOe4ZOLHqAQn+EREV2jbax/bq8ucJvT4jwT8ioouiM7ddwX9GuZb/vwV+q7z0NeCjth+rypvgHxFRocVDPT9CMX/qkvL8DeW1s6oyJvhHRFRoYZv/jGNsH9Vx/hVJ366TMcE/IqILI6ZbNtqnw5SkZ9v+PoCkZ1FzmYcE/4iICu2t+HMe8FVJd1GslPxMai6Vk+AfEdFNHzt8Ja0CPggsAy61/d450hwPfICiLX+H7RfPWzT7y5IOo1jfX8B3bf+iTlkS/CMiqvSh6i9pGfBh4CSKJe03SVpn+46ONAdQdN6usn23pKdVPHNv4PeBF5WlvFHSWts/rypPaxuyIiLawlato8KxwFbbd9l+lGKv3VNnpXkdcLXtu4v3+v6KZ15BsZ3jXwAXA0cCn6zzmVLzj4jowsD0dO1mnwMlbe44nyx3IoS5t649blb+w4Hlkr4G7Ad80PYVXd73nFmjfb6a0T4REf1goH6b/w7b4/Pcq7N17Z7APwNeAjwJ+Iakjba/N88z/1bSC2xvBJB0HPC/6hQ0wT8iokKfxvnX2bp2G8UXyCPAI5JuAI4C5gv+xwFvlHR3eT4GfEfSrRTbpjx/vsIk+EdEVOlP8N8EHCbpUOBe4AyKNv5O1wIXS9oT2IsiuP95l2eu6rUwtYK/pMMppgw/3favSXo+8C9t/8deXxwRMRxqdeZWsr1T0rnABoqhnpfZvl3S2eX9tba/I+l6YAswTTEc9LYuz/w/vZanbs3/LykmE3y0fOEWSX8NJPhHxNLXp1le5XaL62ddWzvr/CLgov68cX51h3o+2fbNs67trMokaZWkOyVtlXTBHPefIuk6Sd+WdLukbOIeEe1i8LRqHYMi6R/t7jPqBv8dkp5N+f0n6dXAD7tl6JjQcDLF2NPXSjpyVrJzgDvKoUrHA38maa/6xY+IGATVPAbmGwCSao3pn0vdZp9zgEngCEn3Aj8A/nVFnscnNJSFnJnQcEdHGgP7SRKwL/AgNf6iiIgYqPYt7rOXpN8F/rmkV86+afvqqgfUCv5lAP9tSfsAe9h+uEa2OhMaLgbWUQx32g843fb07AdJWk2xZRljY2N1ihwR0T/tC/5nA68HDgBeMeuegf4Ef0n/GXi/7R+X5/8YeLvtP+mWbY5rs/8JXwb8HcV+k88GvijpRtsP7ZKpmCE3CTA+Pt6+/wwRsXQtbJLXQNj+OvB1SZttf6yXZ9Rt8z95JvCXL/4RcEpFnjoTGs6kWMfCtrdSNCcdUbNMEREDUWzlWH0sgk9K+neSPlseby23dqxUN/gv6+xdlvQkoKq3+fEJDWUn7hkUTTyd7qaYxoykp1MsS3pXzTJFRAzGtOodg3cJxXIQl5TH0RRzsirV7fD9FPBlSR+n+CPoTcAnumWoM6EBeA9weTkVWcD5tnfULFNExECovY3NzW7jaPv9ZYB+CUWQfo/tDTXydZ3QYPs+4KV1yhARsShMGzt8ZzS/jaPtLwBf6K18ERHDSq3r8O3Q7DaO5TjS9wFP45ezGWx7/56KGxExTFpa8x/ENo7vB15h+zs9ljEiYng9YfZRe5TBfstC89UN/v+QwB8RI6mF4/z7oW7w3yzpvwHXAI//SVFnCnFExLBr42ifclmcg2zfU5l4DnWD//7AT9l1ZE6tKcQREUOvhcHftiVdQzHOf8HqDvXMUssREe2zUdIxtjctNGPd0T57A28GngfsPXPd9psW+sKIiGHTxmaf0gnA2ZL+HniEX47EnHfv3hl1m30+CXyXYiG2d1OsJpcO4IhY+sxiLd1Qx8m9Zqy7ts+v2n4n8IjtTwD/Avgnvb40ImKouOYx6GIVe/geDJxY/v5Tasb1ujX/x8qfP5b0a8D/BQ5ZYDkjIoZSW5t9JF0IjFNM8vo4sJxiLbbfqMpbN/hPlmv4/wnFypz7Au/sqbQREcOmpcEfOA34deBbUKyXJmm/OhnrBv8vl2v43wA8C0DSoT0UNCJi+LQ3+D9aDvmc2V99n7oZ67b5XzXHtc/WfUlExLCS6x+L4L9L+ihwgKS3AF8C/rJOxq41f0lHUAzvfMqsTYL3p2PIZ0TEktbS0T62/1TSScBDwOHAu2x/sU7eqmaf5wAv54mbBD8MvKWHskZEDJ22dviWbgWeRNE4dWvdTF2Dv+1rgWslvdD2N3avfBERQ6qlwV/SWcC7gK9QTPD6C0nvtn1ZVd66Hb6nSbod+BlwPXAU8Ae2P9VjmSMihsPitefXcR7w67YfAJD0VOAmoDL41+3wfanthyiagLZRtC2d11tZIyKGTEsneVHE44c7zh8Gaq3yWbfmv7z8eQrwadsPFquJdidpFfBBig3cL7X93jnSHA98oHzHDtsvrlmmiIiBUMs2c5H0h+Wv9wLflHQtxdfPqcDNdZ5RN/hfJ+m7FM0+vy9pBfDzisItAz4MnETx7bRJ0jrbd3SkOQC4BFhl+25JT6tZnoiIUTYzkev75THj2roPqLuk8wWS3gc8ZHtK0iMU3zDdHAtstX0XgKQryzx3dKR5HXC17bvL99xft+AREQPTsjZ/22t29xlV4/xPtP2VzjH+s5p7um3mspJd2562AcfNSnM4sFzS1yi+yT5o+4o5yrEaWA0wNjbWrcgREf3V4g5fSePAO4Bn0hHP+7Gk829RDCF6BcV3n2b97Bb85+oUmP1PuCfFLjQvoRin+g1JG21/b5dM9iQwCTA+Pt7S/wwRsWS1N+r8FcXgm1tZ4DbzVcH/4bJj4TZ+GfSh3j/FNoqlRmccBNw3R5odth8BHpF0A8Uw0u8REdEW7Q3+222v6yVjVfDft/z5HOAYis4EUfwlcENF3k3AYeUCcPcCZ1C08Xe6FrhY0p7AXhTNQn9eu/QREQ0T7Rvt0+FCSZcCXwZ+MXPRduX+6lUzfNcASPofwNG2Hy7PJ4DPVOTdKelcYAPFUM/LbN8u6ezy/lrb35F0PbCF4k+WS23fVlXoiIiB6WObf53h72W6Y4CNwOm2uy2ieSZwBMVQ+ZmvqKomeaD+UM8x4NGO80epsZmL7fXA+lnX1s46vwi4qGY5IiIGrw/Bv87w945076OoOFc5ynZPuyouZA/fmyX9DcU/w2nAJ3p5YUTE0OlPzb/O8HeAt1Iso39MjWdulHTk7C+QOuqO8/9Pkr4A/GZ56Uzbf7vQl0VEDKMFNPscKGlzx/lkOVoRagx/l7SSonJ9IvWC/4uA35X0A4o2fwHux1DPx9n+FuVWYRERI6V+8N9he3yee3WGv38AOL+cTFvnfatql2yW2sE/ImIkuW+jfeoMfx8HriwD/4HAKZJ22r5m/tL1JsE/IqJKf9r8K4e/2358b3RJlwOf6xL4AT7PL+dg7Q0cCtxJsQNjVwn+EREV+jHUs87w9x6euctIH0lHA/+mTt4E/4iIKn0a519n+HvH9d/r4fnfKucIVErwj4joZvE2aqnUsa4/FJtzHQ1sr5M3wT8iogvR3lU9+eW6/gA7KfoArqqTMcE/IqJCW4P/7qzrn+AfEVGlZcFf0seZv1S2/eaqZyT4R0RUaVnwBz43x7Ux4A8oRhJVSvCPiOimhTt52X68XV/Ss4A/pth8673Ax+o8Y49mihYRsYS45jFAkp4r6VPAdcDXgSNtf8T2oxVZgdT8IyIqtW0zF0mfoVgK4k+BtwFTwP4z6wHZfrDqGQn+EREV2tbsQ7Hip4H/ALy9vNa5ze6zqh6Q4B8R0U0LJ3nZPmR3n5E2/4iIKi1r85d0SMV9STqoW5rU/CMiumjpDN+LJO0BXAvcQrGkw97ArwInAC8BLqRYRnpOjdb8Ja2SdKekrZIu6JLuGElTkl7dZHkiInqhadc6BsX2a4B3As+h2Bf4RmAd8BaKJZ1PtP3Fbs9orObf0GbFERGD1cI2f4Aylr6j1/xNNvs0sVlxRMTAtbDZBwBJr5zj8k+AW23f3y1vk8G/b5sVS1oNrAYYGxvre0EjIrpqafAH3gy8EPhqeX48sBE4XNK7bX9yvoxNtvkvaLPibg+yPWl73Pb4ihUr+lbAiIg65HrHIpgGnmv7VbZfBRwJ/IKion1+t4xN1vyb2Kw4ImLw2lvzP8T2P3Sc3w8cbvtBSY91y9hk8G9is+KIiMFy+5Z36HCjpM8BnynPXw3cIGkf4MfdMjYW/JvYrDgiYtBaOs5/xjnAK4EXURT1E8BVtk0x3n9ejU7yanqz4oiIgXA7o79tS/o68ChF49TNZeCvlOUdIiIqtLXDV9LvADdTNPf8DvDNupNls7xDREQ3LZ3kVXoHcMzMmH5JK4AvAZ+typjgHxFRocUdvnvMmsz1ADVbdBL8IyIqtDj4Xy9pA/Dp8vx0ZvWzzifBPyKiG9PmDt/zJL0K+A2K0T6Ttv+mTt4E/4iICi0e6jmzmftVlQlnSfCPiKjSsuAv6WHmLpUoRoDuX/WMBP+IiC7aOMnL9n67+4wE/4iIbjzYjVoGJcE/IqLK0ov9Cf4REVXa1uzTDwn+ERHdGEizT0TECFp6sT8Lu0VEVOnXwm6SVkm6U9JWSRfMcf/1kraUx02Sjmri80Bq/hERlfox2kfSMuDDwEkUOx1ukrTO9h0dyX4AvNj2jySdDEwya+/zfknNPyKiGy/g6O5YYKvtu2w/ClwJnLrLq+ybbP+oPN1Isf1tI1Lzj4joopjkVbvmf6CkzR3nk7Yny99XAvd03NtG91r9m4Ev1H3xQiX4R0RUqb+q5w7b4/Pc0xzX5vxWkXQCRfB/Ue03L1CCf0REhQXU/LvZBhzccX4QcN8T3iU9H7gUONn2A/148VwabfNvU892RERP+tfmvwk4TNKhkvYCzgDWdSaQNAZcDbzB9vf69yGeqLGaf9t6tiMietOftX1s75R0LrABWAZcZvt2SWeX99cC7wKeClwiCWBnl2ak3dJks8/jPdsAkmZ6th8P/rZv6kjfaM92RETP+rSZi+31zNppqwz6M7+fBZzVl5dVaLLZZ66e7ZVd0s/bsy1ptaTNkjZv3769j0WMiKjgYhvHOscwaTL499Kzff5c921P2h63Pb5ixYo+FjEioga73jFEmmz2aVXPdkREz4YrrtfSZPB/vGcbuJeiZ/t1nQkG2bMdEdErTQ9Zm04NjQX/tvVsR0T0xCxkktfQaHSSV5t6tiMieiHcr0lerZIZvhERVRL8IyJGUIJ/RMSISZt/RMRoymifiIiRM3wTuOpI8I+I6MYk+EdEjKSl1+qT4B8RUSXj/CMiRlGCf0TEiLFhaum1+yT4R0RUSc0/ImIEJfhHRIwYA33Yw7dtEvwjIroyOG3+ERGjxaTDNyJiJKXNPyJiBCX4R0SMmizsFhExegwswSWd92jy4ZJWSbpT0lZJF8xxX5I+VN7fIunoJssTEdETu94xRBqr+UtaBnwYOAnYBmyStM72HR3JTgYOK4/jgI+UPyMiWmJpLu/QZM3/WGCr7btsPwpcCZw6K82pwBUubAQOkPSMBssUEbEwBnu61jFMlk1MTDTy4DVr1rwAeNrExMS68vxQ4LkTExPrO9KcDayfmJi4uzw/DfjmxMTEfZ3PkrR6zZo1H12zZs3q5cuX/8rb3va2+gUxHHX889CTT0dPPn2Xa7v83o9rTT131rV/esq7H/8sM59rl89Xpu9MN6yfterzD+T9LfmsI/Hfetb/r52f9Qmfu541C80w23vffdHEwXsdUW7q0v34/s+/9cOJiYnJ3X3nIMgNtVNJeg3wMttnledvAI61/daONJ8H/ovtr5fnXwb+yPYt8z13fHzcmzdvbqTMEbHkaHcf8JQ9V/iF+81utJjbhh9/7Bbb47v7zkFocrTPNuDgjvODgPt6SBMRsXjsjPZZoE3AYZIOlbQXcAawblaadcAby1E/LwB+YvuHDZYpImLhMtqnPts7JZ0LbACWAZfZvl3S2eX9tcB64BRgK/BT4MymyhMR0RvjqanFLkTfNTrJy/Z6igDfeW1tx+8GzmmyDBERu8VkSeeIiJE0ZMM462h0hm9ExLAz4GnXOqq0adWDBP+IiG7souZf5+iiY9WDk4EjgddKOnJWss5VD1ZTrHrQiAT/iIgKnpqqdVRo1aoHQ9fmf8stt2wAVi12OSJiNDzMjzZ8yZ89sGbyvSV1zkKdtD0z43clcE/HvW08cS2zudKsBPo+BH7ogr/tBP6IGJg+xpy5ZhvP7iiok6Yv0uwTETEYrVr1IME/ImIwWrXqwdA1+0REDKO2rXrQ2KqeERHRXmn2iYgYQQn+EREjKME/ImIEJfhHRIygBP+IiBGU4B8RMYIS/CMiRtD/B+2FgK+jHrdwAAAAAElFTkSuQmCC\n", 73 | "text/plain": [ 74 | "
" 75 | ] 76 | }, 77 | "metadata": { 78 | "needs_background": "light" 79 | }, 80 | "output_type": "display_data" 81 | } 82 | ], 83 | "source": [ 84 | "clusterer = hdbscan.RobustSingleLinkage(cut=0.125, k=7)\n", 85 | "cluster_labels = clusterer.fit_predict(X)\n", 86 | "hierarchy = clusterer.cluster_hierarchy_\n", 87 | "alt_labels = hierarchy.get_clusters(0.100, 5)\n", 88 | "hierarchy.plot()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 20, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "array([ 82, 83, 122, ..., 444, 386, 43], dtype=int64)" 100 | ] 101 | }, 102 | "execution_count": 20, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "cluster_labels" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.8.3" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 4 133 | } 134 | -------------------------------------------------------------------------------- /Sample Scripts/03 PySpark/ConnectToSpark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.sql import SparkSession\n", 10 | "\n", 11 | "spark = SparkSession \\\n", 12 | " .builder \\\n", 13 | " .appName(\"Python Spark SQL basic example\") \\\n", 14 | " .master(\"spark://[YourServerIP]\") \\\n", 15 | " .getOrCreate()\n", 16 | "\n", 17 | "from pyspark import SparkConf, SparkContext\n", 18 | "from pyspark.sql import SparkSession, SQLContext\n", 19 | "sc = SparkContext()\n", 20 | "sqlq = SQLContext(sc)" 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.8.3" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 4 45 | } 46 | -------------------------------------------------------------------------------- /Sample Scripts/03 PySpark/ExampleDataProcessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark import SparkConf, SparkContext\n", 10 | "from pyspark.sql import SparkSession, SQLContext\n", 11 | "import pyspark.sql.functions as func\n", 12 | "from pyspark.sql.types import *\n", 13 | "sc = SparkContext()\n", 14 | "sqlq = SQLContext(sc)\n", 15 | "import os" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "rdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").option(\"quote\",\"^\").load(\"[Path To Your File]\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "tnpdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").option(\"quote\",\"^\").load(\"[Path To Your File]\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "rdf.count()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 7, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "rdf = rdf.filter(rdf.[YourColumn] != '')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "rdf = rdf.filter(rdf.[YourColumn1] != 'SomeValue').filter(rdf.[YourColumn2] != 'SomeOtherValue')" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 11, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "ExcludeList = ['Value1','Value2',...'ValueN']\n", 70 | "rdf = rdf.filter(rdf.[YourColumnName].isin(ExcludeList) == False)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 13, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "joined = rdf.join(tnpdf,func.lower(rdf.[YourJoinColumn]) == func.lower(tnpdf.[YourJoinColumn]), how=\"left\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 15, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "no[YourBooleanColumn] = joined.drop('[YourBooleanColumn]')" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 16, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "no[YourBooleanColumn].select(no[YourBooleanColumn].[YourColumnName]).show(n=20)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 17, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "CheckFor[YourBooleanColumn] = func.udf(lambda x: 'No' if x is None else 'Yes', StringType())" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 18, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "with[YourBooleanColumn] = no[YourBooleanColumn].withColumn('[YourBooleanColumn]',func.lit(CheckFor[YourBooleanColumn](joined.[YourColumnName])))" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 19, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "with[YourBooleanColumn].columns" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 22, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "CleanedResults.select('*').where(CleanedResults.[YourColumnName]=='FilterValue').show(n=20)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 23, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "CleanedResults.repartition(1).write.option('nullValue',None).csv('[Path To Your Output File]', sep='|',header='true',mode='overwrite')" 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "kernelspec": { 148 | "display_name": "Python 3", 149 | "language": "python", 150 | "name": "python3" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 3 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython3", 162 | "version": "3.8.3" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 4 167 | } 168 | -------------------------------------------------------------------------------- /Sample Scripts/03 PySpark/LoadCSV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.sql import SparkSession\n", 10 | "\n", 11 | "spark = SparkSession \\\n", 12 | " .builder \\\n", 13 | " .appName(\"Python Spark SQL basic example\") \\\n", 14 | " .master(\"spark://spark://[YourServerIP]\") \\\n", 15 | " .getOrCreate()" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from pyspark import SparkConf, SparkContext\n", 25 | "from pyspark.sql import SparkSession, SQLContext\n", 26 | "sc = SparkContext()\n", 27 | "sqlq = SQLContext(sc)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 15, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "tnpdf = sqlq.read.format(\"csv\").option(\"header\",\"true\").option(\"delimiter\",\"|\").load(\"[Path To Your CSV]\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 27, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "3057038" 48 | ] 49 | }, 50 | "execution_count": 27, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "rdf.count()" 57 | ] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "Python 3", 63 | "language": "python", 64 | "name": "python3" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.8.3" 77 | } 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 4 81 | } 82 | -------------------------------------------------------------------------------- /Sample Scripts/04 Windows Batch/call Python from SQL Server agent.txt: -------------------------------------------------------------------------------- 1 | python c:\temp\YourPythonScript.py 2 | 3 | SET EXITCODE = %ERRORLEVEL% 4 | IF %EXITCODE% EQ 0 ( 5 | REM Process completed successfully. 6 | EXIT 0 7 | ) 8 | IF %EXITCODE% EQ 1 ( 9 | REM Process error. 10 | EXIT 1 11 | ) 12 | IF %EXITCODE% EQ 2 ( 13 | REM Custom error message. 14 | EXIT 2 15 | ) --------------------------------------------------------------------------------